0 Load Packages¶
In [ ]:
%load_ext autotime
time: 52.9 µs (started: 2024-04-26 03:53:46 -07:00)
In [ ]:
%load_ext watermark
%watermark
Last updated: 2024-04-26T03:53:55.133962-07:00 Python implementation: CPython Python version : 3.11.8 IPython version : 8.20.0 Compiler : GCC 12.3.0 OS : Linux Release : 4.18.0-477.15.1.el8_8.x86_64 Machine : x86_64 Processor : x86_64 CPU cores : 64 Architecture: 64bit time: 343 ms (started: 2024-04-26 03:53:54 -07:00)
In [ ]:
import polars as pl
import warnings
warnings.filterwarnings('ignore')
import os
from rich import print
import datetime
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
# import panel as pn
# pn.extension()
Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)
time: 50 s (started: 2024-04-26 03:54:06 -07:00)
In [ ]:
%watermark --iversions
polars : 0.20.17 matplotlib: 3.8.2 pandas : None ipywidgets: 8.1.1 numpy : 1.26.3 time: 99 ms (started: 2024-04-26 03:55:01 -07:00)
1 Data and Filter¶
In [ ]:
# READ IN DATA and DESCRIBE
RATING_FILE_PATH = '../data/ratings.csv' # ml-32m latest curated dataset
df32m = pl.read_csv(RATING_FILE_PATH)
df32m = df32m.with_columns(
pl.col('movieId').cast(pl.Utf8).cast(pl.Categorical),
pl.col('userId').cast(pl.Utf8).cast(pl.Categorical),
pl.from_epoch(pl.col('timestamp'), time_unit='s')
)
with pl.Config(tbl_rows=20):
print(f"{df32m.describe(percentiles=[.01, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .99])=}")
df32m.describe(percentiles=[.01, .1, .2, .25, .3, .4, .5, .6, .7, .75, .8, .9, .99])=shape: (19, 5) ┌────────────┬──────────┬──────────┬─────────────┬────────────────────────────┐ │ statistic ┆ userId ┆ movieId ┆ rating ┆ timestamp │ │ --- ┆ --- ┆ --- ┆ --- ┆ --- │ │ str ┆ str ┆ str ┆ f64 ┆ str │ ╞════════════╪══════════╪══════════╪═════════════╪════════════════════════════╡ │ count ┆ 32000204 ┆ 32000204 ┆ 3.2000204e7 ┆ 32000204 │ │ null_count ┆ 0 ┆ 0 ┆ 0.0 ┆ 0 │ │ mean ┆ null ┆ null ┆ 3.540396 ┆ 2010-05-30 17:39:59.573263 │ │ std ┆ null ┆ null ┆ 1.058986 ┆ null │ │ min ┆ null ┆ null ┆ 0.5 ┆ 1995-01-09 11:46:44 │ │ 1% ┆ null ┆ null ┆ 0.5 ┆ 1996-06-10 17:26:52 │ │ 10% ┆ null ┆ null ┆ 2.0 ┆ 1999-11-17 10:23:31 │ │ 20% ┆ null ┆ null ┆ 3.0 ┆ 2001-08-10 16:24:23 │ │ 25% ┆ null ┆ null ┆ 3.0 ┆ 2003-04-22 11:53:50 │ │ 30% ┆ null ┆ null ┆ 3.0 ┆ 2004-10-16 05:15:06 │ │ 40% ┆ null ┆ null ┆ 3.5 ┆ 2007-01-26 19:22:44 │ │ 50% ┆ null ┆ null ┆ 3.5 ┆ 2010-04-30 10:03:49 │ │ 60% ┆ null ┆ null ┆ 4.0 ┆ 2015-02-01 22:28:39 │ │ 70% ┆ null ┆ null ┆ 4.0 ┆ 2016-10-13 13:27:36 │ │ 75% ┆ null ┆ null ┆ 4.0 ┆ 2017-08-19 15:59:05 │ │ 80% ┆ null ┆ null ┆ 4.5 ┆ 2018-10-03 07:21:40 │ │ 90% ┆ null ┆ null ┆ 5.0 ┆ 2020-11-05 19:45:35 │ │ 99% ┆ null ┆ null ┆ 5.0 ┆ 2023-06-28 02:27:13 │ │ max ┆ null ┆ null ┆ 5.0 ┆ 2023-10-13 02:29:07 │ └────────────┴──────────┴──────────┴─────────────┴────────────────────────────┘
time: 10.6 s (started: 2024-04-26 03:55:21 -07:00)
In [ ]:
@interact
########################### Filtering the Raw Dataset: remove obsolete ratings ###########################
#### Filter 1: Recent Ratings, timestamp > cutoff_data = 2014-01-01
#### Filter 2: Movies with more than cut_movie = 200 ratings
#### Filter 3: Users with more than cut_user = 40 ratings
#### We are filtering step by step to guarantee the result aligns with what we need. While filtering all at once
#### (all filters applied on df32m) will give us a much smaller dataset that is not useful.
def show_recent_ratings(cutoff_date = '2014-01-01',
cut_movie = widgets.IntSlider(min=1, max=3000, step=1, value=200),
cut_user = widgets.IntSlider(min=1, max=3000, step=1, value=40)):
# cutoff_date = '2014-01-01'
# cut_movie = 200
# cut_user = 40
df32m = pl.read_csv(RATING_FILE_PATH)
cutoff_date = int(datetime.datetime.strptime(cutoff_date, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc).timestamp())
f1_df = df32m.filter(pl.col('timestamp') >= cutoff_date)
movie_counts = f1_df.group_by('movieId').agg(pl.len().alias('count'))
filtered_movie_ids = movie_counts.filter(pl.col('count') >= cut_movie).select('movieId')
f2_df = f1_df.join(filtered_movie_ids, on='movieId', how='inner')
user_counts = f2_df.group_by('userId').agg(pl.len().alias('count'))
filtered_user_ids = user_counts.filter(pl.col('count') >= cut_user).select('userId')
df = f2_df.join(filtered_user_ids, on='userId', how='inner')
print(f"Original: \t{df32m.shape[0]:,}")
print(f"Cutting date: \t{f1_df.shape[0]:,}")
print(f"Cutting movie: \t{f2_df.shape[0]:,}")
print(f"Cutting user: \t{df.shape[0]:,}")
print(f"# of Ratings: \t{df.shape[0]:,}")
print(f"# of Users: \t{df['userId'].n_unique():,}")
print(f"# of Movies: \t{df['movieId'].n_unique():,}")
print(f"Sparsity: \t{1-df.shape[0]/df['userId'].n_unique()/df['movieId'].n_unique():.4n}")
print(f"User Ratings Count: \t{df['userId'].value_counts().describe()}")
print(f"Movie Ratings Count: \t{df['movieId'].value_counts().describe()}")
print(df)
interactive(children=(Text(value='2014-01-01', description='cutoff_date'), IntSlider(value=200, description='c…
time: 1.64 s (started: 2024-04-26 03:55:49 -07:00)
In [ ]:
# use the picked cutoff values for df and pt
cutoff_date = '2014-01-01'
cut_movie = 200
cut_user = 40
df32m = pl.read_csv(RATING_FILE_PATH)
cutoff_date = int(datetime.datetime.strptime(cutoff_date, "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc).timestamp())
f1_df = df32m.filter(pl.col('timestamp') >= cutoff_date)
df32m = df32m.with_columns(pl.from_epoch(pl.col('timestamp'), time_unit='s'))
f1_df = f1_df.with_columns(pl.from_epoch(pl.col('timestamp'), time_unit='s'))
movie_counts = f1_df.group_by('movieId').agg(pl.len().alias('count')).sort('count', descending=True)
filtered_movie_ids = movie_counts.filter(pl.col('count') >= cut_movie).select('movieId')
f2_df = f1_df.join(filtered_movie_ids, on='movieId', how='inner')
user_counts = f2_df.group_by('userId').agg(pl.len().alias('count')).sort('count', descending=True)
filtered_user_ids = user_counts.filter(pl.col('count') >= cut_user).select('userId')
df = f2_df.join(filtered_user_ids, on='userId', how='inner')
time: 972 ms (started: 2024-04-26 03:58:27 -07:00)
In [ ]:
movie_counts = movie_counts.join(pl.read_csv('../data/movie_links_87461_title_genre.csv'), on='movieId', how='inner').sort('count', descending=True)
time: 304 ms (started: 2024-04-26 03:58:47 -07:00)
In [ ]:
movie_counts
Out[ ]:
shape: (83_882, 6)
| movieId | count | imdbId | tmdbId | title | genres |
|---|---|---|---|---|---|
| i64 | u32 | i64 | i64 | str | str |
| 79132 | 47695 | 1375666 | 27205 | "Inception (201… | "Action|Crime|D… |
| 2571 | 47209 | 133093 | 603 | "Matrix, The (1… | "Action|Sci-Fi|… |
| 318 | 44585 | 111161 | 278 | "Shawshank Rede… | "Crime|Drama" |
| 58559 | 42725 | 468569 | 155 | "Dark Knight, T… | "Action|Crime|D… |
| 2959 | 41295 | 137523 | 550 | "Fight Club (19… | "Action|Crime|D… |
| … | … | … | … | … | … |
| 292731 | 1 | 26812510 | 1032473 | "The Monroy Aff… | "Drama" |
| 292737 | 1 | 14907358 | 986674 | "Shelter in Sol… | "Comedy|Drama" |
| 292753 | 1 | 12388280 | 948139 | "Orca (2023)" | "Drama" |
| 292755 | 1 | 64027 | 182776 | "The Angry Bree… | "Drama" |
| 292757 | 1 | 28995566 | 1174725 | "Race to the Su… | "Action|Adventu… |
time: 3.96 ms (started: 2024-04-26 03:59:14 -07:00)
In [ ]:
user_counts
Out[ ]:
shape: (76_311, 2)
| userId | count |
|---|---|
| i64 | u32 |
| 175325 | 4985 |
| 22744 | 4473 |
| 17035 | 3984 |
| 15875 | 3898 |
| 43703 | 3837 |
| … | … |
| 170393 | 1 |
| 173537 | 1 |
| 46401 | 1 |
| 65405 | 1 |
| 123995 | 1 |
time: 1.83 ms (started: 2024-04-26 03:59:41 -07:00)
In [ ]:
filtered_user_ids
Out[ ]:
shape: (56_318, 1)
| userId |
|---|
| i64 |
| 175325 |
| 22744 |
| 17035 |
| 15875 |
| 43703 |
| … |
| 71434 |
| 57670 |
| 86246 |
| 164766 |
| 200812 |
time: 1.66 ms (started: 2024-04-26 04:00:10 -07:00)
In [ ]:
filtered_movie_ids
Out[ ]:
shape: (5_981, 1)
| movieId |
|---|
| i64 |
| 79132 |
| 2571 |
| 318 |
| 58559 |
| 2959 |
| … |
| 115967 |
| 251922 |
| 7976 |
| 67620 |
| 185997 |
time: 1.59 ms (started: 2024-04-26 05:54:07 -07:00)
In [ ]:
df.shape
Out[ ]:
(11727759, 4)
time: 1.37 ms (started: 2024-04-26 04:00:47 -07:00)
In [ ]:
#df.write_csv('../data/ratings_12m_filtered.csv')
time: 1.17 s (started: 2024-04-25 22:50:02 -07:00)
1.1 visualize the filtering process¶
1.1.1 the original df32m data¶
In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = df32m.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
pl.col('movieId').cast(pl.Int64),
pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
xaxis_title='timestamp',
yaxis_title='movieId',
zaxis_title='userId'),
showlegend=False)
fig.show()
time: 20.4 s (started: 2024-04-26 04:01:08 -07:00)
1.1.2 cut to 2014-01-01 and later, f1_df¶
In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = f1_df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
pl.col('movieId').cast(pl.Int64),
pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
xaxis_title='timestamp',
yaxis_title='movieId',
zaxis_title='userId'),
showlegend=False)
fig.show()
time: 96.8 ms (started: 2024-04-26 04:01:47 -07:00)
1.1.3 cut to movies with at least 200 votes, f2_df¶
In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = f2_df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
pl.col('movieId').cast(pl.Int64),
pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
xaxis_title='timestamp',
yaxis_title='movieId',
zaxis_title='userId'),
showlegend=False)
fig.show()
time: 41.2 ms (started: 2024-04-26 04:02:23 -07:00)
1.1.4 cut to users with at least 40 votes, df¶
In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 3D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
pl.col('movieId').cast(pl.Int64),
pl.col('userId').cast(pl.Int64),
)
fig = px.scatter_3d(plt_df, x='timestamp', y='movieId', z='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
xaxis_title='timestamp',
yaxis_title='movieId',
zaxis_title='userId'),
showlegend=False)
fig.show()
time: 41.5 ms (started: 2024-04-26 04:02:49 -07:00)
In [ ]:
# 2d plot
In [ ]:
%matplotlib inline
import plotly.express as px
# create a interactive 2D plot, with the x-axis being the 'timestamp', y-axis being the 'movieId', and z-axis being the 'userId'
# let the size of the points be the 'rating'
# add a plane to cut the plot in half, at timestamp=2014-01-01
# sample 1000 points to make the plot more interactive
plt_df = df.sample(1000).sort('timestamp')
plt_df = plt_df.with_columns(
pl.col('movieId').cast(pl.Int64),
pl.col('userId').cast(pl.Int64),
)
fig = px.scatter(plt_df, x='movieId', y='userId', size='rating', color='rating')
fig.update_traces(marker=dict(line=dict(width=4, color='DarkSlateGrey')))
fig.update_layout(scene = dict(
xaxis_title='movieId',
yaxis_title='userId'),
showlegend=False)
fig.show()
time: 1.1 s (started: 2024-04-26 04:03:20 -07:00)
1.2 Pivot Table¶
In [ ]:
pt = df.pivot(index="userId", columns="movieId", values="rating")
# sort most voted users and movies on top
X = pt.select(pl.col('userId'),pl.col(movie_counts.filter(pl.col("movieId").cast(str).is_in(pt.columns))["movieId"].cast(str)))
X = X.with_columns(pl.col('userId').cast(pl.Utf8).cast(pl.Categorical))
filtered_user_ids = filtered_user_ids.with_columns(pl.col('userId').cast(pl.Utf8).cast(pl.Categorical))
X = filtered_user_ids.join(X, on='userId', how='left')
X
Out[ ]:
shape: (56_318, 5_980)
| userId | 79132 | 2571 | 318 | 58559 | 2959 | 7153 | 4993 | 356 | 109487 | 5952 | 296 | 260 | 593 | 99114 | 1196 | 60069 | 68954 | 858 | 134130 | 91529 | 527 | 1198 | 68157 | 3578 | 72998 | 106782 | 112852 | 59315 | 4226 | 6539 | 6874 | 4306 | 33794 | 6377 | 74458 | 1704 | … | 6770 | 8928 | 64278 | 66304 | 98956 | 99106 | 116660 | 118760 | 138104 | 141648 | 158956 | 192109 | 193954 | 205857 | 281904 | 522 | 1081 | 2946 | 32792 | 55946 | 85790 | 93819 | 94867 | 97194 | 116668 | 169252 | 203619 | 208807 | 224983 | 81 | 3952 | 7976 | 67620 | 115967 | 127134 | 185997 | 251922 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| cat | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | … | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
| "175325" | 4.0 | 4.0 | 4.5 | 4.5 | 3.5 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 4.5 | 4.0 | 4.0 | 4.0 | 4.0 | 4.0 | 4.0 | 4.5 | 4.5 | 4.0 | 4.5 | 4.0 | 4.0 | 3.5 | 3.5 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 4.0 | 4.5 | … | 3.0 | 3.5 | 3.5 | null | 4.0 | 2.5 | 3.5 | 4.0 | null | 3.0 | null | 3.5 | 3.0 | null | null | 4.0 | 3.5 | 3.5 | 5.0 | 3.0 | 4.0 | 2.5 | 2.5 | null | 3.5 | 4.0 | 3.5 | null | null | 1.5 | 3.0 | 2.0 | null | 3.0 | 2.0 | 3.0 | null |
| "22744" | 4.0 | 5.0 | 3.0 | 5.0 | 5.0 | 5.0 | 5.0 | 4.0 | 4.0 | 5.0 | 5.0 | 5.0 | 5.0 | 3.0 | 5.0 | 4.0 | 4.0 | 5.0 | 4.0 | 5.0 | 0.5 | 5.0 | 3.0 | 3.0 | 3.0 | 4.0 | 4.0 | 4.0 | 4.0 | 4.0 | 5.0 | 5.0 | 5.0 | 5.0 | 3.0 | 3.0 | … | null | null | null | 2.5 | null | 4.0 | null | null | 2.0 | 2.0 | 1.0 | null | 0.5 | null | null | 0.5 | 1.0 | 2.0 | null | 0.5 | null | null | 3.0 | null | 2.0 | null | null | null | null | 2.0 | null | null | null | 2.0 | 2.0 | 3.0 | null |
| "17035" | 1.5 | 1.5 | 5.0 | 4.5 | 4.5 | 4.0 | 3.0 | 3.5 | 2.0 | 3.5 | 5.0 | 0.5 | 5.0 | 4.0 | 0.5 | 1.0 | 4.0 | 5.0 | 4.0 | 0.5 | 4.5 | 4.5 | 4.5 | 4.5 | 1.5 | 4.0 | 0.5 | 4.0 | 4.0 | null | 4.5 | 4.0 | 0.5 | 4.0 | 4.0 | 4.0 | … | 2.0 | 1.0 | null | 0.5 | null | 3.5 | null | 0.5 | null | 0.5 | null | null | null | 2.5 | null | 2.0 | 4.5 | null | null | 0.5 | 2.5 | 3.0 | 3.5 | null | null | null | null | 3.5 | null | 4.0 | 3.5 | null | 3.5 | 2.5 | 3.5 | null | null |
| "15875" | 3.0 | 5.0 | 5.0 | 4.0 | 5.0 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 5.0 | 4.0 | 3.5 | 4.5 | 4.5 | 3.0 | 3.5 | 4.0 | 5.0 | 4.0 | 4.0 | 4.0 | 5.0 | 2.5 | 3.0 | 4.0 | 4.0 | 4.0 | 5.0 | 3.0 | 4.0 | 4.0 | 3.5 | 3.5 | 4.0 | 2.5 | … | 3.0 | 3.0 | null | null | null | null | null | null | null | null | 2.5 | 4.0 | 2.0 | null | null | 4.0 | 3.0 | null | 3.0 | null | null | 3.0 | 1.5 | null | 3.5 | null | null | null | null | 3.0 | 1.5 | 3.5 | null | 3.0 | null | null | null |
| "43703" | 2.5 | 5.0 | 3.5 | 4.0 | 3.5 | 3.0 | 2.5 | 2.5 | 3.5 | 2.5 | 3.5 | 3.5 | 5.0 | 3.5 | 5.0 | 4.0 | 3.5 | 4.0 | 2.5 | 3.5 | 3.5 | 3.5 | 4.0 | 2.5 | 2.0 | 4.0 | 2.5 | 3.5 | 3.5 | 3.0 | 3.5 | 3.0 | 3.5 | 2.5 | 3.0 | 3.5 | … | 1.0 | 3.0 | 3.0 | 1.5 | null | null | null | null | null | null | null | 1.5 | null | 1.5 | null | 2.0 | null | null | 2.0 | null | 2.5 | 2.0 | null | null | 2.5 | null | 2.0 | null | 2.0 | null | null | null | null | 2.0 | null | null | 1.5 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| "71434" | null | null | null | null | null | null | null | null | null | null | 5.0 | 4.5 | 5.0 | null | null | null | null | 5.0 | null | null | 5.0 | null | null | null | null | null | null | null | 5.0 | null | null | null | null | null | null | null | … | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null |
| "57670" | 4.5 | 4.5 | null | null | null | null | null | null | 4.5 | null | null | 4.5 | null | null | 5.0 | 5.0 | 4.5 | null | 4.5 | null | null | 4.0 | null | null | 4.0 | null | 4.5 | 4.5 | null | null | null | null | null | null | null | null | … | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null |
| "86246" | null | null | null | 5.0 | null | null | 3.0 | null | null | null | null | null | null | null | null | null | null | null | null | 4.0 | null | null | null | null | 4.0 | null | null | 4.0 | null | 3.5 | null | null | 4.5 | null | null | null | … | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null |
| "164766" | 4.5 | null | null | 4.5 | 4.0 | null | null | null | 4.5 | null | null | null | null | null | null | null | null | null | 4.5 | null | null | null | null | null | null | 4.0 | null | null | 4.5 | null | null | null | 4.0 | null | 4.0 | null | … | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null |
| "200812" | 4.0 | null | 5.0 | null | null | 4.5 | 4.5 | null | null | null | null | null | null | null | null | 2.0 | null | null | null | null | null | null | 4.0 | null | null | null | null | null | null | 3.0 | null | 2.5 | null | null | null | null | … | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null | null |
time: 3.1 s (started: 2024-04-26 04:07:08 -07:00)
In [ ]:
def calculate_sparsity(df: pl.DataFrame) -> float:
"""
Calculate the sparsity of a Polars DataFrame.
Args:
df (pl.DataFrame): The Polars DataFrame for which to calculate sparsity.
Returns:
float: The sparsity of the DataFrame.
"""
# Total elements is the number of rows multiplied by the number of columns
total_elements = df.height * df.width
# Count non-missing, non-zero values
missing = sum(df.null_count())
# Sparsity is 1 minus the proportion of non-zero, non-missing entries
sparsity = missing / total_elements
return sparsity
# Example usage with a dummy DataFrame:
dummy_data = pl.DataFrame({
'A': [0, 0, 1, None],
'B': [1, 2, 0, 0],
'C': [None, None, None, None]
})
calculate_sparsity(dummy_data)
Out[ ]:
shape: (1,)
| A |
|---|
| f64 |
| 0.416667 |
time: 2.27 ms (started: 2024-04-26 11:45:58 -07:00)
In [ ]:
print(calculate_sparsity(X[:10000,:]))
print(calculate_sparsity(X[:10000,:1001]))
shape: (1,) Series: 'userId' [f64] [ 0.895131 ]
shape: (1,) Series: 'userId' [f64] [ 0.646962 ]
time: 27.8 ms (started: 2024-04-26 11:47:22 -07:00)
2 Clustering¶
2.0 Over-simplifying the market as a whole¶
In [ ]:
# One Centroid represents the whole market. Each Movie's mean rating is its loading for this whole cluster
loadings = X.mean()
loadings
Out[ ]:
shape: (1, 5_980)
| userId | 79132 | 2571 | 318 | 58559 | 2959 | 7153 | 4993 | 356 | 109487 | 5952 | 296 | 260 | 593 | 99114 | 1196 | 60069 | 68954 | 858 | 134130 | 91529 | 527 | 1198 | 68157 | 3578 | 72998 | 106782 | 112852 | 59315 | 4226 | 6539 | 6874 | 4306 | 33794 | 6377 | 74458 | 1704 | … | 6770 | 8928 | 64278 | 66304 | 98956 | 99106 | 116660 | 118760 | 138104 | 141648 | 158956 | 192109 | 193954 | 205857 | 281904 | 522 | 1081 | 2946 | 32792 | 55946 | 85790 | 93819 | 94867 | 97194 | 116668 | 169252 | 203619 | 208807 | 224983 | 81 | 3952 | 7976 | 67620 | 115967 | 127134 | 185997 | 251922 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| cat | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | … | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
| null | 4.132816 | 4.11884 | 4.328511 | 4.133716 | 4.218658 | 4.03331 | 4.028187 | 4.076231 | 4.112761 | 4.01789 | 4.226428 | 3.957079 | 4.082426 | 4.008424 | 4.046046 | 3.978485 | 3.901311 | 4.219631 | 4.006142 | 3.933931 | 4.08114 | 3.934357 | 4.028735 | 3.956386 | 3.512107 | 3.918774 | 3.897736 | 3.749119 | 4.083559 | 3.688088 | 3.802861 | 3.59861 | 3.869268 | 3.739308 | 3.991113 | 4.135534 | … | 3.604061 | 3.3575 | 3.85 | 2.524752 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.853535 | 3.8925 | 2.780612 | 3.310945 | 3.255051 | 3.382653 | 3.510417 | 3.435567 | 3.870466 | 2.756219 | 3.387755 | 3.28 | 2.95202 | 3.87 | 3.206468 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 3.035 | 3.407035 | 3.199482 | 3.492424 | 3.477273 | 3.25 | 2.721939 | 2.645729 |
time: 56.8 ms (started: 2024-04-26 05:18:12 -07:00)
In [ ]:
# finetuning for the whole market:
# {"system":"use this movie plot to predict the average market reaction in a scale from 0.5 to 5: {{plot}} ", "answer": "{{loading}}"}
# example:
# {"question": "use this move plot "}
2.1 Over-complicating, we have personalization¶
However, in the real world, we have 99.8% missing values such that we cannot reach personalization without introducing biases.
In [ ]:
2.2 Somewhere in the middle,¶
In [ ]:
# now we want to find a middle ground
# a K-group users, with distinction and representation
time: 210 µs (started: 2024-04-26 05:49:23 -07:00)
In [ ]:
df_with_titles = df.join(movie_counts, on='movieId')
Out[ ]:
shape: (11_727_113, 9)
| userId | movieId | rating | timestamp | count | imdbId | tmdbId | title | genres |
|---|---|---|---|---|---|---|---|---|
| i64 | i64 | f64 | datetime[μs] | u32 | i64 | i64 | str | str |
| 10 | 1320 | 3.0 | 2015-03-22 15:58:19 | 3390 | 103644 | 8077 | "Alien³ (a.k.a.… | "Action|Horror|… |
| 10 | 2003 | 3.5 | 2017-03-18 20:26:28 | 3527 | 87363 | 927 | "Gremlins (1984… | "Comedy|Horror" |
| 10 | 2985 | 3.0 | 2014-11-01 19:48:25 | 4936 | 93870 | 5548 | "RoboCop (1987)… | "Action|Crime|D… |
| 10 | 60040 | 2.5 | 2014-08-07 15:45:16 | 5593 | 800080 | 1724 | "Incredible Hul… | "Action|Sci-Fi" |
| 10 | 68791 | 2.0 | 2016-06-11 14:32:44 | 3984 | 438488 | 534 | "Terminator Sal… | "Action|Adventu… |
| … | … | … | … | … | … | … | … | … |
| 200947 | 116797 | 3.5 | 2017-05-28 12:02:34 | 24460 | 2084970 | 205596 | "The Imitation … | "Drama|Thriller… |
| 200947 | 122882 | 3.5 | 2017-05-28 12:13:52 | 21315 | 1392190 | 76341 | "Mad Max: Fury … | "Action|Adventu… |
| 200947 | 134853 | 5.0 | 2017-05-28 12:11:46 | 21483 | 2096673 | 150540 | "Inside Out (20… | "Adventure|Anim… |
| 200947 | 152081 | 3.0 | 2017-05-28 12:04:17 | 14431 | 2948356 | 269149 | "Zootopia (2016… | "Action|Adventu… |
| 200947 | 164909 | 4.5 | 2017-05-28 12:16:15 | 7231 | 3783958 | 313369 | "La La Land (20… | "Comedy|Drama|R… |
time: 67.4 ms (started: 2024-04-26 05:55:27 -07:00)
In [ ]:
df_with_titles = df.join(movie_counts, on='movieId')
time: 64.2 ms (started: 2024-04-26 06:03:52 -07:00)
In [ ]:
# to pandas
df_with_titles_pd = df_with_titles.to_pandas()
df_with_titles_pd = df_with_titles_pd.groupby('userId').apply(lambda x: ';'.join(['{} {}'.format(a, b) for a, b in zip(x['title'], x['rating'])])).reset_index().rename(columns={0:'pattern'})
df_with_titles_pd
Out[ ]:
| userId | pattern | |
|---|---|---|
| 0 | 10 | Alien³ (a.k.a. Alien 3) (1992) 3.0;Gremlins (1... |
| 1 | 16 | Braveheart (1995) 1.0;Bad Boys (1995) 1.5;Forr... |
| 2 | 20 | Toy Story (1995) 5.0;Twelve Monkeys (a.k.a. 12... |
| 3 | 22 | Star Wars: Episode IV - A New Hope (1977) 4.0;... |
| 4 | 23 | Toy Story (1995) 3.0;Star Wars: Episode IV - A... |
| ... | ... | ... |
| 56313 | 200933 | Dead Man Walking (1995) 4.5;Chungking Express ... |
| 56314 | 200943 | Art of War, The (2000) 1.5;Finding Forrester (... |
| 56315 | 200944 | Toy Story (1995) 4.0;Seven (a.k.a. Se7en) (199... |
| 56316 | 200945 | Get Shorty (1995) 4.0;Twelve Monkeys (a.k.a. 1... |
| 56317 | 200947 | Toy Story (1995) 4.0;Braveheart (1995) 4.5;Tax... |
56318 rows × 2 columns
time: 12.1 s (started: 2024-04-26 06:55:50 -07:00)
In [ ]:
df_with_titles_pd.pattern.apply(lambda x: len(x)).max()
Out[ ]:
148335
time: 18.2 ms (started: 2024-04-26 06:57:20 -07:00)
In [ ]:
df_56k_user_patterns = df_with_titles_pd.set_index('userId')
time: 32.8 ms (started: 2024-04-26 07:32:04 -07:00)
In [ ]:
# use openai embeddings to convert patterns into embeddings
In [ ]:
import latentscope as ls
# ls.init('../ls_embeddings',openai_key="sk-proj-GlQIJefzzR8hTixZXDO4T3BlbkFJFFDsaVyztKblDis7N4b3") # tier-1 limit
ls.init('../ls_embeddings',openai_key="sk-proj-ENEqiPGjjZ1JwkfBWibTT3BlbkFJ5jxzolr8Z8EFq1pptjRt") # tier-4 limit
ls.ingest('56k-users', df_56k_user_patterns, 'pattern')
Initialized env with data directory at ../ls_embeddings
Loading environment variables from: /dfs6/pub/mgu3/____ML/ml-32m/ML32M10K/code/.env
DATA DIR ../ls_embeddings
DIRECTORY ../ls_embeddings/56k-users
pattern
0 Alien³ (a.k.a. Alien 3) (1992) 3.0;Gremlins (1...
1 Braveheart (1995) 1.0;Bad Boys (1995) 1.5;Forr...
2 Toy Story (1995) 5.0;Twelve Monkeys (a.k.a. 12...
3 Star Wars: Episode IV - A New Hope (1977) 4.0;...
4 Toy Story (1995) 3.0;Star Wars: Episode IV - A...
pattern
56313 Dead Man Walking (1995) 4.5;Chungking Express ...
56314 Art of War, The (2000) 1.5;Finding Forrester (...
56315 Toy Story (1995) 4.0;Seven (a.k.a. Se7en) (199...
56316 Get Shorty (1995) 4.0;Twelve Monkeys (a.k.a. 1...
56317 Toy Story (1995) 4.0;Braveheart (1995) 4.5;Tax...
Index(['pattern'], dtype='object')
wrote ../ls_embeddings/56k-users/input.parquet
time: 2.83 s (started: 2024-04-26 07:32:44 -07:00)
wrote ../ls_embeddings/56k-users/input.parquet
time: 2.83 s (started: 2024-04-26 07:32:44 -07:00)
In [ ]:
%%capture
ls.serve()
time: 3h 51min 57s (started: 2024-04-26 07:32:56 -07:00)
In [ ]:
# load openai embeddings 001.h5 and do UMAP and HDBSCAN to get clusters.
# eyeball whether natural clusters exist
In [ ]:
In [ ]:
In [ ]:
2.2.1 Not Filling in Missings¶
2.2.1.1 Non-Negative Matrix Factorization¶
Singular Value Decomposition (SVD)¶
$$ \hat{r}_{ui} = q_i^T p_u $$ where user and item factors are kept positive.


In [ ]:
X_fill_mean = X.fill_null(strategy='mean')
X_fill_zero = X.fill_null(strategy='zero')
X_fill_min = X.fill_null(strategy='min')
time: 241 ms (started: 2024-04-26 04:10:16 -07:00)
In [ ]:
from sklearn.decomposition import NMF
A = X_fill_zero[:,1:]
# Initialize NMF and fit it to the matrix A
nmf = NMF(n_components=2, init='random', random_state=0)
W = nmf.fit_transform(A)
H = nmf.components_
# Print the resulting matrices
print(f"{W[:2]=}")
print(f"{H[:2]=}")
W: [[0. 0.83260542] [0. 2.49781625] [0. 3.33042167] [0. 4.16302709] [1.63417451 0.35754022] [1.97409297 0. ] [0.81708726 0.17877011]]
H: [[0. 0.44234624 0. 2.49355028 2.49355028] [1.19564514 1.21171687 1.19564514 0. 0. ]]
time: 21.4 ms (started: 2024-04-25 22:34:39 -07:00)
In [ ]:
from sklearn.decomposition import NMF
from sklearn.model_selection import GridSearchCV
import numpy as np
# Sample data (for example purposes, in practice your data will be different)
X = X_fill_zero[:, 1:]
# Parameters to search over
param_grid = {
'n_components': [10, 15, 20, 25, 30, 35, 40, 45, 50], # number of components
'init': ['random', 'nndsvd'], # initialization method
'l1_ratio': [0, 0.5, 1], # regularization mixing parameter
}
# Note: NMF does not have a 'score' method usually required by GridSearchCV
# Here we create a custom scorer function. In this case, it could be the NMF's
# reconstruction error itself. For the purpose of GridSearchCV, we need to
# make it a function that accepts the estimator, X, and y (which is ignored).
def nmf_score(estimator, X, y=None):
# Compute the reconstruction error, the lower, the better
reconstruction_error = np.linalg.norm(X - estimator.transform(X).dot(estimator.components_))
return -reconstruction_error # GridSearchCV maximizes the score so return the negative error
# Create the NMF instance
nmf = NMF()
# Create the GridSearchCV instance
grid_search = GridSearchCV(estimator=nmf, param_grid=param_grid, scoring=nmf_score, cv=3)
# Perform grid search (this may take some time depending on your data size and parameter grid)
grid_search.fit(X)
# Best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best parameters found:", best_params)
print("Best score found:", best_score)
Best parameters found: {'init': 'random', 'l1_ratio': 0, 'n_components': 10}
Best score found: nan
time: 1h 7min 56s (started: 2024-04-25 23:03:26 -07:00)
In [ ]:
from sklearn.decomposition import NMF
from sklearn.model_selection import GridSearchCV
import numpy as np
# Sample data (for example purposes, in practice your data will be different)
A = X_fill_zero[:, 1:]
# Parameters to search over
param_grid = {
'n_components': [8, 9,10, 11, 12, 13, 14, 15], # number of components
'init': ['random', 'nndsvd'], # initialization method
'l1_ratio': [0, 0.5, 1], # regularization mixing parameter
}
# Note: NMF does not have a 'score' method usually required by GridSearchCV
# Here we create a custom scorer function. In this case, it could be the NMF's
# reconstruction error itself. For the purpose of GridSearchCV, we need to
# make it a function that accepts the estimator, X, and y (which is ignored).
def nmf_score(estimator, X, y=None):
# Compute the reconstruction error, the lower, the better
reconstruction_error = np.linalg.norm(X - estimator.transform(X).dot(estimator.components_))
return -reconstruction_error # GridSearchCV maximizes the score so return the negative error
# Create the NMF instance
nmf = NMF()
# Create the GridSearchCV instance
grid_search = GridSearchCV(estimator=nmf, param_grid=param_grid, scoring=nmf_score, cv=3)
# Perform grid search (this may take some time depending on your data size and parameter grid)
grid_search.fit(A)
# Best parameters and score
best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best parameters found:", best_params)
print("Best score found:", best_score)
Best parameters found: {'init': 'random', 'l1_ratio': 0, 'n_components': 8}
Best score found: nan
time: 33min 45s (started: 2024-04-26 04:13:30 -07:00)
In [ ]:
# NMF n=10
In [ ]:
from sklearn.decomposition import NMF
A = X_fill_zero[:,1:]
# Initialize NMF and fit it to the matrix A
nmf = NMF(n_components=10, init='random', random_state=0)
U = nmf.fit_transform(A)
M = nmf.components_
# Print the resulting matrices
print(f"{U[:2]=}")
print(f"{M[:2]=}")
U[:2]=array([[2.9517392 , 7.56836747, 0. , 0.60258417, 0.55196251, 0. , 0.02226424, 0.97733986, 0. , 4.84292076], [4.40771523, 9.0685571 , 0. , 0.53739161, 0.68046949, 0. , 0. , 0.61939399, 0.1306364 , 1.24826888]])
M[:2]=array([[0. , 0.09362535, 0. , ..., 0.00819534, 0. , 0.02706899], [0.13194742, 0.10950007, 0.15303912, ..., 0.01518143, 0.01983315, 0.00635778]])
time: 19.2 s (started: 2024-04-26 11:51:40 -07:00)
In [ ]:
# use U to cluster users into 10 groups
# jump to 2.3 KMeans
time: 184 µs (started: 2024-04-26 11:54:02 -07:00)
In [ ]:
print(np.load('../data/U.npy')[:2])
print(np.load('../data/M.npy')[:2])
[[2.9465047 7.57024189 0. 0.60239043 0.55344382 0. 0.02233019 0.9683429 0. 4.84344654] [4.39981346 9.07085948 0. 0.53721187 0.68230197 0. 0. 0.61367887 0.13100777 1.24841661]]
[[0. 0.09379923 0. ... 0.00821147 0. 0.02712022] [0.13191784 0.10947779 0.153014 ... 0.01517629 0.01982737 0.00635432]]
time: 51.5 ms (started: 2024-04-26 00:23:36 -07:00)
In [ ]:
# write U and M to disk,
np.save('../data/U.npy', U)
np.save('../data/M.npy', M)
time: 77.9 ms (started: 2024-04-26 00:21:33 -07:00)
In [ ]:
# grid search to find the best svd hyper-parameters
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline, make_union
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import f1_score, make_scorer
time: 437 µs (started: 2024-04-25 16:41:57 -07:00)
In [ ]:
# example SVD n_c=2:
data = X_fill_zero[:,1:]
# Initialize SVD with desired number of components
# For example, reduce dimensions to 2
svd = TruncatedSVD(n_components=2)
# Fit SVD on the data
svd.fit(data)
# Transform the data
reduced_data = svd.transform(data)
# Explained variance ratio of each component
explained_variance_ratio = svd.explained_variance_ratio_
# Explained variance
explained_variance = svd.explained_variance_
# Total variance explained by the two components
total_variance_explained = explained_variance_ratio.sum()
print("Explained Variance Ratio per Component:", explained_variance_ratio)
print("Explained Variance per Component:", explained_variance)
print("Total Variance Explained by all components:", total_variance_explained)
# Optionally, print reduced data shape and data
print("Shape of the reduced data:", reduced_data.shape)
print("Reduced data sample:", reduced_data[:5])
Explained Variance Ratio per Component: [0.09722155 0.03288487]
Explained Variance per Component: [237.2527247 80.24995381]
Total Variance Explained by all components: 0.13010641926714567
Shape of the reduced data: (56318, 2)
Reduced data sample: [[133.00376388 31.16510487] [121.15507989 56.95709056] [ 96.17317574 7.36524487] [121.23997846 23.14126167] [ 94.88499129 6.43712727]]
time: 2.24 s (started: 2024-04-25 20:45:46 -07:00)
In [ ]:
U0, S0, Vt0 = np.linalg.svd(X_fill_zero[:,1:], full_matrices=True)
print(f"{U0.shape=}")
print(f"{Vt0.shape=}")
print(f"{S0.shape=}")
print(f"{S0[:10]=}")
U0.shape=(56318, 56318)
Vt0.shape=(5979, 5979)
S0.shape=(5979,)
S0[:10]=array([6237.31217074, 2152.07968921, 2049.07343725, 1619.19600204, 1521.81169818, 1424.41457779, 1179.46393407, 1129.03339058, 1041.86336774, 977.87115844])
time: 5min 4s (started: 2024-04-25 21:43:00 -07:00)
In [ ]:
U, S, Vt = np.linalg.svd(X_fill_zero[:,1:], full_matrices=False)
print(f"{U.shape=}")
print(f"{Vt.shape=}")
print(f"{S.shape=}")
U.shape=(56318, 5979)
Vt.shape=(5979, 5979)
S.shape=(5979,)
time: 52.1 s (started: 2024-04-25 21:34:59 -07:00)
In [ ]:
print(S[:50]) # 50 most significant components
[6237.31217074 2152.07968921 2049.07343725 1619.19600204 1521.81169818 1424.41457779 1179.46393407 1129.03339058 1041.86336774 977.87115844 938.79926732 893.71094347 869.90878612 846.77842883 810.2193332 781.02267472 764.4849044 746.67560439 738.28771328 712.34653437 689.70132419 672.08246405 654.06569961 649.68152676 632.05935215 611.58689271 603.23255205 597.63074211 587.64633119 577.87482512 566.97381058 559.34838485 544.52478823 543.1516472 535.8183286 526.95349245 521.08248945 512.63979617 507.96459684 503.53502669 502.19393197 495.46971891 489.1205354 482.4307243 480.28695211 474.52375232 469.32982393 464.65911358 463.28967386 459.40606028]
time: 3.26 ms (started: 2024-04-25 21:48:11 -07:00)
Variations such as Truncated SVD, Random SVD, etc.¶
In [ ]:
# Truncated SVD 8,18,28,...,208:
data = X_fill_zero[:,1:]
# Initialize SVD with desired number of components
# For example, reduce dimensions to 2
for n_components in range(8, 208, 10):
svd = TruncatedSVD(n_components=n_components)
# Fit SVD on the data
svd.fit(data)
# Transform the data
reduced_data = svd.transform(data)
# Explained variance ratio of each component
explained_variance_ratio = svd.explained_variance_ratio_
# Explained variance
explained_variance = svd.explained_variance_
# Total variance explained by the two components
total_variance_explained = explained_variance_ratio.sum()
print('*'*8, f'{n_components=}','*'*8)
print("Explained Variance Ratio per Component:", explained_variance_ratio)
print("Explained Variance per Component:", explained_variance)
print("Total Variance Explained by all components:", total_variance_explained)
# Optionally, print reduced data shape and data
print("Shape of the reduced data:", reduced_data.shape)
print("Reduced data sample:", reduced_data[:2])
******** n_components=8 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673718 0.01475283 0.01010284 0.0092745 ]
Explained Variance per Component: [237.2527247 80.24995498 67.96294291 46.54311131 40.84425068 36.00178855 24.65426763 22.63284597]
Total Variance Explained by all components: 0.22789612304174614
Shape of the reduced data: (56318, 8)
Reduced data sample: [[133.00376387 31.16636601 123.42716109 15.45883823 -23.44536389 -6.45489709 12.82095864 48.62831849] [121.15507988 56.95791992 85.95726368 -8.90305672 5.83964536 -1.95302928 -8.05168437 6.24374895]]
******** n_components=18 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927505 0.0078448 0.00695542 0.00637767 0.00568503 0.00550605 0.00521344 0.00476878 0.00443415 0.00425041 0.00403835]
Explained Variance per Component: [237.2527247 80.24995645 67.96295032 46.54311305 40.84420683 36.00175104 24.65460742 22.63419888 19.14390046 16.97351371 15.5636117 13.8733453 13.43657565 12.72250805 11.63739085 10.82080127 10.37240324 9.85490876]
Total Variance Explained by all components: 0.28297086576977293
Shape of the reduced data: (56318, 18)
Reduced data sample: [[133.00376387 31.16636085 123.42720867 15.45900093 -23.44687303 -6.4565844 12.81649118 48.48953461 2.20274735 2.26620521 32.7787854 42.66543285 27.99113157 32.43558668 15.65286104 25.74843185 9.59038797 -7.43816194] [121.15507988 56.95798045 85.95741523 -8.90393668 5.8355789 -1.96477038 -8.10452038 6.01388268 31.98164497 13.15604454 15.88449945 31.07839057 18.77301111 26.18327316 2.27267186 7.98095153 14.79152468 12.94739324]]
******** n_components=28 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568513 0.00550618 0.00521393 0.00476938 0.0044347 0.00425096 0.00404262 0.00396348 0.00368588 0.00345731 0.00327683 0.00309716 0.00305912 0.00289585 0.00268953 0.00263987 0.00258719]
Explained Variance per Component: [237.2527247 80.24995645 67.96295032 46.54311307 40.84420644 36.0017507 24.65460609 22.63420225 19.1438969 16.97351415 15.56368026 13.87359867 13.43689672 12.72371436 11.63886411 10.82212789 10.3737554 9.86533288 9.6721969 8.99477401 8.43699095 7.99654311 7.55809708 7.46526093 7.06682571 6.56334561 6.44214799 6.31360583]
Total Variance Explained by all components: 0.3143298182170661
Shape of the reduced data: (56318, 28)
Reduced data sample: [[133.00376387 31.16636484 123.4272117 15.45903375 -23.4470334 -6.45676562 12.81544843 48.49115438 2.18343315 2.26291561 32.78573574 42.52659631 28.14083058 32.19747057 15.92075381 26.05363435 9.74021692 -6.89725431 6.57106597 1.08353349 0.63529727 -7.69918902 13.31487261 -4.99679251 18.24149368 -2.35083628 18.51751569 7.79806557] [121.15507988 56.95798555 85.95742172 -8.90386699 5.83569167 -1.96500169 -8.10534737 6.02032055 31.95596152 13.17854582 15.92157046 31.02292516 18.80600053 25.84258338 2.6278006 8.15649713 14.76282105 14.01963808 2.48819273 -4.05606637 13.95550766 0.52193685 -6.85257852 6.61923656 -8.78692127 -8.95955255 0.17326256 16.85387057]]
******** n_components=38 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.0063777 0.00568514 0.00550618 0.00521396 0.00476944 0.00443477 0.00425112 0.00404301 0.00396363 0.0036867 0.00345741 0.0032783 0.00310991 0.0030595 0.00290254 0.00271493 0.00264468 0.00258926 0.00250471 0.00242729 0.00232917 0.00226779 0.00215216 0.00213451 0.00206695 0.00199507 0.00192281 0.00186449]
Explained Variance per Component: [237.2527247 80.24995646 67.9629503 46.54311306 40.84420641 36.00175066 24.65460649 22.63420232 19.14389646 16.97351655 15.56368862 13.87361423 13.43689737 12.72377958 11.63900515 10.82229427 10.37414842 9.86628168 9.67256569 8.99676972 8.43722988 8.00014372 7.58921106 7.46618208 7.08316508 6.62532471 6.45390292 6.31864634 6.11230916 5.92340169 5.68395432 5.53414738 5.25197418 5.20890295 5.04403506 4.86862988 4.69230413 4.54998373]
Total Variance Explained by all components: 0.33605012797396866
Shape of the reduced data: (56318, 38)
Reduced data sample: [[133.00376387 31.16636467 123.42721112 15.45902867 -23.44702243 -6.45675433 12.81576212 48.49110527 2.1821811 2.26902253 32.78954314 42.51468098 28.13701326 32.20328497 15.95038119 26.07991622 9.75461692 -6.70120187 6.65322363 1.30376486 0.48761689 -7.7240546 12.17725229 -7.20091229 18.06477794 -4.29203813 19.76381899 6.64369691 12.4471184 7.1778797 15.53576508 16.65147115 11.56728922 -8.60793799 -4.66156454 14.70019297 13.07390359 -4.9849275 ] [121.15507988 56.95798553 85.9574215 -8.90387458 5.83568602 -1.96502385 -8.10539169 6.0205663 31.954539 13.17987514 15.92834629 31.0182058 18.80915723 25.84513447 2.62678745 8.19036588 14.74813615 14.05672546 2.43345142 -4.07784751 13.69944389 0.80554797 -3.85595569 8.55227622 -9.30079318 -10.8233526 1.32787331 16.25863232 -2.05962705 1.5755995 18.44027719 4.57085988 0.48190616 0.15928762 5.99198621 4.70183059 -3.83517568 -5.7695013 ]]
******** n_components=48 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404315 0.00396365 0.00368676 0.00345757 0.0032786 0.0031099 0.00305956 0.00290309 0.00271648 0.00264489 0.00258996 0.00250983 0.00242769 0.00233054 0.00227212 0.00215142 0.00214006 0.00208009 0.00201451 0.00195913 0.00190259 0.00186724 0.00182943 0.00182017 0.00175966 0.00170704 0.00168124 0.00163504 0.00156767 0.00154982 0.00152112]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420645 36.0017507 24.65460664 22.63420233 19.14389594 16.97351645 15.5636779 13.87362504 13.43690347 12.72377619 11.63903287 10.82234197 10.37420361 9.86662574 9.67261657 8.99691857 8.43761214 8.00086685 7.58918754 7.4663288 7.08448914 6.62911519 6.45439615 6.32036451 6.12481318 5.92436699 5.68728794 5.54471241 5.25017176 5.22244356 5.07609946 4.91608141 4.78091979 4.64295681 4.55667102 4.46441539 4.44181817 4.29416307 4.1657305 4.10279346 3.99002617 3.82563332 3.78207428 3.71204444]
Total Variance Explained by all components: 0.35311536156774
Shape of the reduced data: (56318, 48)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590262e+01 -2.34470212e+01 -6.45673358e+00 1.28157200e+01 4.84913044e+01 2.18286255e+00 2.26913226e+00 3.27904160e+01 4.25131018e+01 2.81339698e+01 3.21966896e+01 1.59421371e+01 2.61066873e+01 9.76826113e+00 -6.71389287e+00 6.65940693e+00 1.28758923e+00 6.05928991e-01 -7.81453799e+00 1.20437826e+01 -7.49976032e+00 1.76667883e+01 -4.35916922e+00 2.05399928e+01 7.29665522e+00 1.29165914e+01 7.09350112e+00 1.45003823e+01 1.52027317e+01 1.45472372e+01 -4.38977929e+00 -7.88311430e+00 1.52424586e+01 -1.13786291e+01 -7.27151954e+00 -9.44917435e+00 -5.06167208e+00 -2.29842672e+00 -1.16707910e+00 -5.51253350e+00 -4.73513903e+00 5.65140907e-01 -3.68477244e-01 -3.70733896e+00 8.95486666e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387650e+00 5.83569174e+00 -1.96499855e+00 -8.10548047e+00 6.02060856e+00 3.19554749e+01 1.31814635e+01 1.59294112e+01 3.10172516e+01 1.88036760e+01 2.58376024e+01 2.62828869e+00 8.22521775e+00 1.47655147e+01 1.40443612e+01 2.43725587e+00 -4.04568444e+00 1.38702621e+01 7.57576088e-01 -4.00046482e+00 8.30845980e+00 -9.74516516e+00 -1.10679932e+01 1.69858771e+00 1.59986096e+01 -2.00753011e+00 2.30116228e+00 1.90327524e+01 3.84830324e+00 2.27136182e+00 1.01725908e+00 3.95255755e+00 6.70212616e+00 -3.02273020e+00 8.28049835e+00 -6.85818163e+00 -1.43645782e+00 -7.79837632e+00 2.40284687e+00 7.50871245e-02 2.92361181e+00 2.64602556e+00 -4.46671664e+00 3.00380013e+00 1.56913484e+01]]
******** n_components=58 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396366 0.00368676 0.00345757 0.00327867 0.00310998 0.00305991 0.00290313 0.00271677 0.00264566 0.00259055 0.002512 0.00242943 0.00233391 0.00227492 0.00215505 0.00214467 0.00208487 0.00201668 0.00197051 0.00190182 0.00187564 0.00182998 0.00182778 0.00178112 0.0017252 0.00168409 0.00166803 0.00160952 0.00158726 0.00154985 0.00153487 0.00150022 0.00147067 0.0014341 0.00141603 0.00137657 0.0013574 0.00134466 0.0013307 0.0013161 ]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420642 36.00175069 24.65460662 22.63420233 19.14389657 16.97351591 15.56368115 13.87362439 13.43690321 12.72377723 11.6390236 10.822345 10.37420068 9.86664534 9.67263954 8.99691745 8.43762535 8.00104006 7.58938177 7.46718745 7.08458599 6.62982734 6.45627683 6.32180777 6.1301123 5.9286076 5.69550692 5.55156646 5.2590445 5.23370043 5.08776834 4.92137779 4.80870539 4.64106363 4.57718608 4.46574767 4.46037911 4.34653058 4.21005624 4.10974775 4.07054314 3.92774879 3.87344552 3.78214142 3.74558462 3.66103594 3.58892197 3.49968404 3.45558976 3.35929546 3.31250342 3.28140989 3.2473566 3.21171684]
Total Variance Explained by all components: 0.3674348405398642
Shape of the reduced data: (56318, 58)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590260e+01 -2.34470198e+01 -6.45674038e+00 1.28157209e+01 4.84912928e+01 2.18312604e+00 2.26879765e+00 3.27896494e+01 4.25139377e+01 2.81331331e+01 3.21968864e+01 1.59421061e+01 2.61147484e+01 9.77124399e+00 -6.71413290e+00 6.66646896e+00 1.28381988e+00 6.01649232e-01 -7.83571333e+00 1.20787403e+01 -7.41247954e+00 1.76241427e+01 -4.33811073e+00 2.03958127e+01 6.79434057e+00 1.29204228e+01 6.97056813e+00 1.53455369e+01 1.44247550e+01 1.23993415e+01 -8.00832783e+00 -6.21109410e+00 1.86479458e+01 -9.98947064e+00 -7.51494610e+00 -6.47810050e+00 -7.83906973e+00 -5.06798353e+00 2.70020664e+00 -8.27996753e+00 2.50870857e+00 3.88754578e+00 3.42435010e-01 1.64787133e+00 1.70752532e+01 1.87568897e+00 8.55961038e+00 -5.34009234e+00 6.19212344e-01 -3.50813741e+00 4.42900333e+00 1.15615517e+01 1.11572399e+00 1.74891150e+00 5.59351020e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387697e+00 5.83569036e+00 -1.96499799e+00 -8.10544382e+00 6.02059577e+00 3.19552345e+01 1.31809707e+01 1.59281478e+01 3.10156446e+01 1.88038075e+01 2.58382618e+01 2.62762086e+00 8.23966423e+00 1.47725637e+01 1.40510995e+01 2.42428838e+00 -4.04582362e+00 1.38458523e+01 7.30741421e-01 -3.96098188e+00 8.39054772e+00 -9.69573366e+00 -1.09963383e+01 1.70450178e+00 1.58714193e+01 -1.56484538e+00 1.77416292e+00 1.95693332e+01 2.69812012e+00 1.08052293e+00 -9.70316709e-02 4.13363914e+00 7.69732730e+00 -2.37516725e+00 9.95628089e+00 -4.73121944e+00 -8.49826885e+00 -1.13706006e+00 2.63902724e+00 5.12793656e+00 -4.36502034e+00 7.72387972e-01 2.41564086e+00 1.59137630e+00 8.46749223e+00 1.43045984e+01 8.52828134e+00 -1.33647159e+00 9.59675838e+00 -3.37656241e+00 3.50470822e+00 4.35619130e+00 -7.57429208e+00 -5.11842048e+00 2.64902911e+00]]
******** n_components=68 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345758 0.00327867 0.00310999 0.0030599 0.00290326 0.00271681 0.00264578 0.0025907 0.00251216 0.00242936 0.00233416 0.00227524 0.00215586 0.00214586 0.00208843 0.0020193 0.00197262 0.00190948 0.00187429 0.00183975 0.00182759 0.00178054 0.0017359 0.00168681 0.00166943 0.00162672 0.00158484 0.00155982 0.00154872 0.00152363 0.00149099 0.00145217 0.00144487 0.00140349 0.00137808 0.00135623 0.00135659 0.0013478 0.00134114 0.0013065 0.00128595 0.00127793 0.00124825 0.00122851 0.00121746 0.00119264 0.00117801 0.00116351]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460656 22.63420233 19.14389638 16.97351607 15.56368011 13.87362483 13.43690357 12.72377865 11.6390243 10.82235166 10.37420503 9.86664625 9.67265748 8.99692953 8.43764836 8.00104161 7.5894055 7.46715863 7.08490357 6.62992668 6.45657964 6.32215503 6.13050715 5.9284367 5.69613091 5.55232712 5.26100757 5.23661793 5.09646038 4.92775568 4.8138554 4.65976966 4.57388639 4.48960234 4.45992757 4.34511145 4.2361667 4.11637047 4.07395028 3.96974633 3.86753204 3.80647986 3.7793911 3.71815257 3.63850568 3.54376596 3.5259591 3.4249775 3.36297143 3.30965533 3.31052459 3.28907168 3.27281545 3.18828768 3.13814953 3.11856952 3.04613125 2.99797192 2.97099631 2.91042756 2.87473039 2.83935424]
Total Variance Explained by all components: 0.3801622564505494
Shape of the reduced data: (56318, 68)
Reduced data sample: [[133.00376387 31.16636469 123.42721126 15.4590261 -23.4470217 -6.45674035 12.81569949 48.49125458 2.18302168 2.26869038 32.78992237 42.51339924 28.1328793 32.19793767 15.94284915 26.11436957 9.77150086 -6.71940298 6.67085653 1.29990316 0.57888641 -7.82722865 12.11129346 -7.41192883 17.72385636 -4.32222464 20.37302141 7.04264904 13.00849527 6.93157302 15.81073169 14.54327869 10.81626036 -9.65554405 -7.23100314 17.5416964 -10.64694398 -8.45343359 -6.72909171 -8.19036715 1.0650004 -3.55529271 -6.62028684 2.2025157 2.48035287 5.55421003 4.8551586 14.08410094 -1.56948062 12.54953333 1.12863348 2.47813715 15.96875678 4.34725055 1.23446664 4.57268219 -1.96189848 1.67435323 -5.8139415 5.52021519 8.2714722 -5.65208598 1.6808753 5.10892235 2.15773093 0.6256852 -0.9338832 -10.45773759] [121.15507988 56.95798552 85.95742171 -8.90387669 5.83568906 -1.96500003 -8.10542793 6.02059964 31.95516845 13.18077455 15.92818963 31.01539018 18.80296448 25.83974193 2.62754137 8.23660068 14.7763301 14.04739504 2.4302134 -4.03626949 13.84168941 0.71763898 -3.95850514 8.38433251 -9.64280477 -11.00988161 1.7130614 16.04811962 -1.42645833 1.68209093 19.63088693 2.75530826 1.31373846 -0.15570905 3.94148655 7.33139875 -2.96553256 9.43097485 -4.49937295 -7.57500674 4.58316878 -1.07067129 5.27063107 -4.15856023 2.3661998 2.43382701 -0.15854762 9.00296063 12.92485048 9.90039807 5.90829368 5.65788644 12.22639727 8.07935105 -8.23821709 4.95213288 5.99040735 -0.72366237 0.75591105 3.30647793 2.34162538 -2.21933856 -1.08989839 1.54447159 -0.15227023 -10.67022032 0.6520758 -6.56765295]]
******** n_components=78 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311 0.0030599 0.00290326 0.00271682 0.00264584 0.00259078 0.00251234 0.00242955 0.00233449 0.00227561 0.00215692 0.00214623 0.00208872 0.00201949 0.00197273 0.001911 0.00187613 0.00183961 0.0018286 0.00178414 0.00173792 0.00168848 0.00167524 0.0016358 0.00159723 0.00156523 0.00154903 0.00152515 0.00149415 0.00145813 0.00144964 0.00141686 0.00140171 0.00139345 0.0013739 0.00136276 0.00134291 0.00134226 0.00132092 0.00129061 0.00128534 0.0012712 0.00123129 0.00122567 0.00121539 0.0012026 0.00119293 0.0011707 0.00116725 0.00114455 0.00112776 0.0011268 0.00111426 0.00109743 0.00107814 0.00107274]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460651 22.63420233 19.14389634 16.97351609 15.56368031 13.87362469 13.4369036 12.72377818 11.63902522 10.82235049 10.37420419 9.86664602 9.67265663 8.99693718 8.43765351 8.00105169 7.58941628 7.46716185 7.08491767 6.62992928 6.45672478 6.32236301 6.1309388 5.92890673 5.69692252 5.55323772 5.26358598 5.23750588 5.09717244 4.92822293 4.81411613 4.66348114 4.57837539 4.48926492 4.462385 4.35390308 4.24109547 4.12045239 4.08814872 3.99189531 3.89778023 3.81968939 3.78015468 3.72185899 3.6462218 3.55831469 3.53759506 3.45760364 3.42064788 3.40048641 3.3527617 3.32559597 3.27715415 3.27555936 3.22348463 3.14950798 3.13664474 3.10214704 3.00475287 2.99103523 2.96594708 2.93473376 2.91115504 2.85690304 2.84848638 2.79308675 2.75210285 2.74977199 2.71916706 2.67809932 2.63102798 2.61783076]
Total Variance Explained by all components: 0.3919128250530247
Shape of the reduced data: (56318, 78)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590267e+01 -2.34470219e+01 -6.45674029e+00 1.28157044e+01 4.84913038e+01 2.18302535e+00 2.26871303e+00 3.27900127e+01 4.25131529e+01 2.81330148e+01 3.21984629e+01 1.59442416e+01 2.61125646e+01 9.76997572e+00 -6.71292968e+00 6.66705953e+00 1.29743422e+00 5.87908527e-01 -7.83739564e+00 1.21125078e+01 -7.41886764e+00 1.77263435e+01 -4.38791784e+00 2.03556492e+01 6.99356935e+00 1.29670524e+01 6.91418527e+00 1.57212461e+01 1.43397071e+01 1.16746826e+01 -8.96467265e+00 -7.38509047e+00 1.70358725e+01 -1.03836753e+01 8.52071586e+00 -6.52661721e+00 -8.26015183e+00 -5.03111038e-01 -2.69745234e+00 -6.44515830e+00 1.94775552e+00 2.96785139e+00 5.63188059e+00 -3.64588825e+00 1.17986682e+01 -9.88994707e+00 9.95633024e+00 1.38937533e+00 1.37746187e+01 1.03420521e+01 5.45035442e+00 3.26087360e+00 -2.54052223e+00 8.28502966e-01 -1.64934265e+00 9.18147987e+00 2.53533857e+00 1.83436851e+00 -1.45259208e+00 -5.80510995e-01 -7.43757163e-01 8.37880798e+00 -1.14622026e+00 3.12044176e+00 3.74032926e-01 -5.76775146e+00 -7.56045473e+00 9.78447293e+00 4.90530604e+00 -3.50472914e-01 -8.81662130e+00 -8.10624973e+00 2.43327445e+00 3.96817570e+00 4.87285778e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387661e+00 5.83568951e+00 -1.96499983e+00 -8.10544285e+00 6.02062922e+00 3.19552539e+01 1.31808064e+01 1.59284810e+01 3.10155061e+01 1.88029928e+01 2.58397702e+01 2.62937022e+00 8.23569564e+00 1.47756376e+01 1.40475816e+01 2.42798432e+00 -4.03874354e+00 1.38391293e+01 7.11469617e-01 -3.92840983e+00 8.38746654e+00 -9.63833757e+00 -1.10353616e+01 1.77178537e+00 1.60407903e+01 -1.53776977e+00 1.73779376e+00 1.97361322e+01 2.67388030e+00 1.39654939e+00 -9.97914014e-02 4.00732379e+00 7.22602102e+00 -2.87117331e+00 -9.02345130e+00 -5.19244094e+00 -8.25250696e+00 3.23706310e+00 -1.89942083e+00 5.65319802e+00 -5.26757538e+00 1.55386364e+00 1.68048333e+00 -3.39129404e+00 1.16356258e+01 4.98135217e+00 9.27520948e+00 8.48315961e+00 1.53127045e+01 4.45224916e+00 6.25038023e+00 2.21803778e+00 -9.66022636e-01 -1.00354197e+01 -1.00472770e+00 -8.34027398e-01 -2.15660086e+00 3.03384166e+00 -6.91157523e+00 -3.64102465e+00 7.67848991e+00 5.60308928e+00 9.66094926e-01 -4.61422692e+00 -3.18864082e+00 1.52484735e+00 1.90740418e+00 4.16148082e+00 -2.32143338e+00 -4.11922381e+00 -5.84036033e+00 1.29524018e+00 1.81304966e+00 -1.19291889e+00 -3.19046341e+00]]
******** n_components=88 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290326 0.00271682 0.00264588 0.00259079 0.00251234 0.00242957 0.00233459 0.00227565 0.00215698 0.00214635 0.00208872 0.00201995 0.00197339 0.00191128 0.00187671 0.00184307 0.00182767 0.00178512 0.00173918 0.00168906 0.00167691 0.00163592 0.00159815 0.00156639 0.00155669 0.00153008 0.00149735 0.0014624 0.00145607 0.0014367 0.00140309 0.00140124 0.00138648 0.00135955 0.00135009 0.0013447 0.00133499 0.00129989 0.0012907 0.00128208 0.00124414 0.00124092 0.00122589 0.00121868 0.00120045 0.00118186 0.00118274 0.00116699 0.00116191 0.00114668 0.00113218 0.00112973 0.00111601 0.0011072 0.00109184 0.00107954 0.0010654 0.00105477 0.00104534 0.00103727 0.00102835 0.00102582 0.0010127 0.00101014]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460655 22.63420233 19.14389613 16.97351609 15.56367971 13.87362355 13.43690362 12.72377808 11.63902496 10.82235054 10.37420416 9.86664524 9.67265686 8.99693974 8.43765202 8.00106172 7.58944722 7.46719544 7.08492053 6.62994577 6.45682147 6.32237225 6.13093222 5.92895239 5.69717585 5.55333365 5.26373929 5.23780069 5.09715797 4.9293517 4.81572219 4.66415025 4.5797878 4.49770039 4.46012618 4.35629007 4.24416613 4.12186078 4.09222371 3.99217483 3.90001226 3.82251524 3.79884244 3.73389088 3.65403782 3.56874144 3.55328542 3.50602718 3.42401534 3.41948886 3.3834668 3.31775717 3.29465432 3.28150647 3.25780635 3.17214936 3.14973342 3.1286898 3.03611489 3.02824314 2.99158897 2.97397809 2.92948965 2.88412403 2.88627651 2.84784593 2.83543659 2.79827213 2.7628905 2.7569104 2.72342872 2.7019342 2.66445729 2.63444451 2.59991846 2.57397928 2.55098508 2.5312844 2.50951384 2.50333857 2.47133477 2.46506886]
Total Variance Explained by all components: 0.40277750234220333
Shape of the reduced data: (56318, 88)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470214e+01 -6.45674004e+00 1.28157058e+01 4.84912886e+01 2.18299125e+00 2.26885360e+00 3.27898200e+01 4.25132952e+01 2.81327383e+01 3.21980696e+01 1.59420854e+01 2.61143592e+01 9.77186610e+00 -6.71932812e+00 6.67011620e+00 1.29765548e+00 5.82982098e-01 -7.83825349e+00 1.21250563e+01 -7.41438848e+00 1.77287123e+01 -4.35783685e+00 2.03671722e+01 6.96912314e+00 1.29781018e+01 6.90685347e+00 1.56755773e+01 1.43082328e+01 1.17297021e+01 -8.95007334e+00 -7.18824498e+00 1.74006075e+01 -1.08273696e+01 8.80860788e+00 -6.28546260e+00 -8.16961832e+00 1.82924842e+00 3.05576217e+00 -6.51275880e+00 2.01177956e+00 2.72819434e+00 5.59212891e+00 2.71066111e+00 1.16420373e+01 6.56869585e+00 1.43236161e+01 -1.75729717e+00 1.32629060e+01 9.09901688e+00 3.09852488e+00 2.28953298e+00 4.82499752e+00 2.01200383e+00 4.60915636e+00 5.69290873e+00 3.20282258e+00 -4.04025227e+00 3.00396754e+00 -4.07415388e+00 2.40995855e+00 8.59577646e+00 1.62576482e+00 -3.68352903e+00 -3.46467973e+00 -9.24861211e+00 7.28150917e-01 -1.91941455e+00 -6.38298211e-01 -1.70312630e+00 1.43010126e+01 -3.59225666e+00 6.81044892e+00 -2.83022984e+00 5.65127737e+00 4.92951599e+00 -1.59541404e+00 1.40457527e+00 -2.65838771e+00 6.47252637e+00 2.40473514e+00 5.40890054e+00 5.11752416e+00 2.40625659e+00 1.89405828e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387647e+00 5.83568905e+00 -1.96499938e+00 -8.10543676e+00 6.02060297e+00 3.19552288e+01 1.31808479e+01 1.59283297e+01 3.10155986e+01 1.88028267e+01 2.58402154e+01 2.62835287e+00 8.23563563e+00 1.47747677e+01 1.40468709e+01 2.42815610e+00 -4.03491011e+00 1.38387310e+01 7.15527761e-01 -3.92756666e+00 8.39606257e+00 -9.62755301e+00 -1.10691356e+01 1.79837917e+00 1.60463724e+01 -1.52544453e+00 1.71792950e+00 1.97056359e+01 2.74082185e+00 1.47926423e+00 -1.57324119e-02 4.10462542e+00 7.08735990e+00 -2.99235363e+00 -8.90795153e+00 -5.25407722e+00 -7.35262030e+00 5.33690666e+00 1.98391335e+00 5.31596494e+00 -4.13619434e+00 1.55949980e+00 2.70097324e+00 2.47551196e+00 9.85054617e+00 -9.67941507e+00 9.93597382e+00 -6.80843800e+00 1.47661253e+01 4.27253881e+00 6.87384635e+00 -1.30281449e+00 3.97993991e+00 -8.72409187e+00 -6.29544938e-02 -1.13728620e-01 4.80491257e-02 2.59818037e+00 6.07907396e+00 -1.92555331e-01 7.75986741e+00 2.77484158e+00 -6.07760242e+00 -6.72337662e+00 1.00910226e-01 2.71628316e+00 3.13010081e+00 -3.79657142e-01 -8.96463392e-01 -4.90361304e+00 3.84090613e+00 -2.14792211e+00 1.13814818e+01 3.29898831e-01 -5.30162753e-01 1.32228851e-01 -4.41752629e+00 4.13138716e+00 4.68430606e+00 -3.49290479e+00 -1.71740355e+00 2.82982760e+00 6.76004203e-01 3.64081663e+00 -1.00931616e+00]]
******** n_components=98 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271684 0.00264587 0.00259078 0.00251238 0.00242959 0.00233457 0.00227573 0.00215701 0.00214647 0.0020889 0.00202025 0.00197347 0.0019113 0.00187686 0.00184365 0.00182852 0.00178515 0.00173996 0.00169067 0.00167726 0.0016374 0.00160085 0.00156581 0.00155726 0.00153265 0.00150056 0.00146652 0.00145768 0.00143576 0.00141066 0.00140393 0.00139031 0.00137041 0.00135676 0.0013516 0.00132942 0.00130205 0.00129577 0.00128443 0.00125565 0.00124496 0.00123854 0.00122756 0.00120598 0.00119281 0.00119058 0.00118158 0.00116947 0.00115395 0.00114139 0.00113478 0.00113253 0.00111872 0.00109394 0.00108645 0.00108241 0.00107142 0.00105757 0.00104442 0.00104141 0.00103467 0.00103002 0.00101543 0.00100607 0.00100711 0.00099963 0.00097883 0.00097341 0.00096371 0.00095674 0.00095451 0.00094667 0.00093631]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460655 22.63420233 19.14389608 16.97351607 15.56368012 13.87362394 13.43690363 12.7237784 11.6390251 10.82235155 10.37420507 9.86664546 9.67265877 8.99693894 8.43765582 8.00106686 7.5894459 7.46718202 7.08493583 6.62999688 6.45679253 6.32235033 6.13103078 5.9290146 5.69711292 5.55353686 5.2638248 5.23809733 5.09759492 4.93009007 4.81592312 4.66420388 4.58016292 4.49910483 4.46219147 4.35636838 4.24608437 4.12580106 4.09307697 3.995794 3.90659415 3.82108499 3.80022732 3.74017673 3.661852 3.57878419 3.55722027 3.50373621 3.44246907 3.4260538 3.39282042 3.34426306 3.31094968 3.29836203 3.2442172 3.17743409 3.16210925 3.13443136 3.06419706 3.03810984 3.02244199 2.99564517 2.9429994 2.9108539 2.90541125 2.88344565 2.8539023 2.81601229 2.78536561 2.76924586 2.76374727 2.73004673 2.66956587 2.65130146 2.64144947 2.6146117 2.58082624 2.54871961 2.54139335 2.52492997 2.51359478 2.4779969 2.45514178 2.45767491 2.43942473 2.38867023 2.37543431 2.35177858 2.33475201 2.32930931 2.31019566 2.28489888]
Total Variance Explained by all components: 0.4128026938858566
Shape of the reduced data: (56318, 98)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590265e+01 -2.34470222e+01 -6.45674048e+00 1.28156979e+01 4.84912826e+01 2.18301784e+00 2.26886536e+00 3.27899127e+01 4.25133299e+01 2.81327905e+01 3.21984368e+01 1.59423152e+01 2.61145736e+01 9.76944773e+00 -6.71805837e+00 6.66886362e+00 1.29677533e+00 5.83502571e-01 -7.83559412e+00 1.21145516e+01 -7.41694650e+00 1.77250099e+01 -4.36771330e+00 2.03464385e+01 6.97902421e+00 1.29571015e+01 6.94753682e+00 1.56993892e+01 1.44045905e+01 1.16952604e+01 -8.87728524e+00 -7.14940167e+00 1.72671872e+01 -1.06650979e+01 -8.61512760e+00 -6.20993967e+00 -8.40733278e+00 1.16423283e+00 2.68448516e+00 -7.20516981e+00 1.57878191e+00 3.09221869e+00 5.52260347e+00 -2.62554092e+00 1.28363518e+01 6.80016365e+00 1.25103152e+01 1.21391874e+00 1.49507871e+01 6.79871241e+00 4.71498985e+00 3.63592167e+00 3.40985451e+00 2.79283624e+00 -4.87978631e+00 5.35499645e+00 -3.81660570e+00 -3.21011348e+00 2.06169172e+00 2.97779522e+00 2.60037628e+00 3.49345823e+00 -5.94792019e-01 9.27237459e+00 2.54790531e+00 1.31464042e+00 -5.85745583e+00 1.09477316e+01 -1.57992999e-01 -7.20274544e+00 1.06349517e+01 2.00049927e+00 1.29766698e+01 -6.49104370e+00 -2.31903136e-01 -3.36271938e+00 4.60502119e+00 9.52167707e-01 2.82649141e+00 4.39742261e+00 8.19301027e-01 -2.41695071e-01 4.51604788e+00 6.74154786e+00 -9.04671600e-01 2.78714732e+00 -2.00026852e+00 3.95708693e+00 -3.97165651e+00 -3.70959734e+00 -1.44814960e+00 -2.28116922e+00 -5.05927254e-01 -1.85025715e+00 -1.04107779e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387659e+00 5.83568894e+00 -1.96499977e+00 -8.10543466e+00 6.02060750e+00 3.19552440e+01 1.31808340e+01 1.59283820e+01 3.10155069e+01 1.88029106e+01 2.58404392e+01 2.62848275e+00 8.23559556e+00 1.47737921e+01 1.40480854e+01 2.42921675e+00 -4.03642256e+00 1.38382040e+01 7.14881617e-01 -3.93024781e+00 8.39190284e+00 -9.63231500e+00 -1.10480238e+01 1.78004086e+00 1.60641765e+01 -1.54402527e+00 1.72913747e+00 1.96928364e+01 2.74981370e+00 1.45598938e+00 -5.91931582e-02 4.09055817e+00 7.18905323e+00 -3.08507336e+00 8.92162116e+00 -5.04558640e+00 -7.68590707e+00 4.87843206e+00 1.80461866e+00 5.10221842e+00 -4.63447061e+00 1.42553917e+00 2.75074644e+00 -1.45009834e+00 1.05989497e+01 -9.98839966e+00 9.92239163e+00 6.09275545e+00 1.55224930e+01 7.55303120e-01 6.71616359e+00 3.69074137e+00 1.72041113e+00 -7.24776377e+00 8.20527069e-01 3.22798489e+00 4.37996069e-01 3.37962014e+00 6.07974060e+00 -5.43361244e-01 9.03912281e+00 5.27582294e+00 6.89957495e+00 4.56540290e+00 4.12917348e-01 4.26989726e+00 -2.54322998e+00 -3.85973296e+00 4.04186026e+00 -7.62421570e-01 1.88288357e+00 7.57638154e-01 7.30808771e+00 -1.57296175e+00 -1.57769161e+00 4.07560712e+00 1.73142650e+00 1.75658183e+00 3.20713950e+00 -3.01886724e+00 -5.54619567e+00 1.07150577e+00 -3.10153157e+00 -4.90577205e+00 2.63201194e+00 2.64216189e+00 2.04368360e+00 5.32531055e+00 5.98395203e+00 -6.38830544e+00 1.29034415e+00 1.47415969e+00 -5.14515947e+00 2.79036165e+00 3.04542267e+00]]
******** n_components=108 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271684 0.00264588 0.00259079 0.00251238 0.00242959 0.0023346 0.00227573 0.0021571 0.00214649 0.00208891 0.00202033 0.00197363 0.0019116 0.00187713 0.00184359 0.00182882 0.00178564 0.00174026 0.0016909 0.00167782 0.00163779 0.00160105 0.001569 0.00155738 0.00153334 0.00150296 0.00146847 0.00146065 0.00143828 0.00141224 0.0014069 0.00139113 0.00137201 0.00136138 0.00135531 0.00134052 0.00130835 0.00130178 0.0012897 0.0012616 0.00125046 0.00124126 0.0012364 0.00121986 0.0012137 0.00120196 0.0011873 0.00117532 0.00116058 0.00114771 0.00114721 0.00113804 0.00112236 0.00111935 0.00111324 0.00109285 0.00108954 0.0010746 0.00106642 0.00105688 0.00105103 0.00104016 0.00103661 0.00102646 0.00101452 0.00100923 0.00100505 0.00099556 0.00099097 0.00098023 0.00097421 0.00097154 0.00096716 0.00096127 0.00094973 0.00094137 0.0009333 0.00092764 0.00092798 0.00092083 0.00090716 0.00090368 0.00089446]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389612 16.97351608 15.56367997 13.87362356 13.43690362 12.72377837 11.63902546 10.82235183 10.37420569 9.86664478 9.67265801 8.99693917 8.43765693 8.00106777 7.58945145 7.46718469 7.08493241 6.62999311 6.45681334 6.32238847 6.13104858 5.92901325 5.69720432 5.55352588 5.2640452 5.23813836 5.09762088 4.93028535 4.81630924 4.66492972 4.58081445 4.4989735 4.46292713 4.35754346 4.24680078 4.12634796 4.09443007 3.99673874 3.90708502 3.8288829 3.80052096 3.7418613 3.66772454 3.58356141 3.56446247 3.50986908 3.44632694 3.43330571 3.39481541 3.34814609 3.32221546 3.30739736 3.27132411 3.19281241 3.17677541 3.14728775 3.07871096 3.05154466 3.02909067 3.01723063 2.97686307 2.96182221 2.93317139 2.89739615 2.86816844 2.83219123 2.80078332 2.79956381 2.77718193 2.73893126 2.73158381 2.71666884 2.66692542 2.65884015 2.6223728 2.60242215 2.57914175 2.56485218 2.53832281 2.52967708 2.50489926 2.4757665 2.46286448 2.4526504 2.42948573 2.41830249 2.39208086 2.37740001 2.37088143 2.36019591 2.34582717 2.31766568 2.29724409 2.27754882 2.26375655 2.26457129 2.24712139 2.21377598 2.20527095 2.18277008]
Total Variance Explained by all components: 0.422641432626354
Shape of the reduced data: (56318, 108)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590265e+01 -2.34470219e+01 -6.45674014e+00 1.28157041e+01 4.84912864e+01 2.18299980e+00 2.26877792e+00 3.27898774e+01 4.25132593e+01 2.81328885e+01 3.21980142e+01 1.59421893e+01 2.61138611e+01 9.76911861e+00 -6.71669276e+00 6.66901407e+00 1.29772692e+00 5.85521944e-01 -7.83515734e+00 1.21179720e+01 -7.41355481e+00 1.77278400e+01 -4.34881231e+00 2.03608487e+01 6.98646164e+00 1.29641272e+01 6.92288682e+00 1.57071066e+01 1.43356884e+01 1.16913918e+01 -8.81023215e+00 -7.20199227e+00 1.73017182e+01 -1.05749286e+01 8.69369015e+00 -6.14591075e+00 -8.41755036e+00 1.00274803e+00 -2.79077658e+00 -7.06057225e+00 1.88142717e+00 3.06854215e+00 5.42060953e+00 -3.04382336e+00 1.09516315e+01 7.86766109e+00 1.32423426e+01 -1.17841287e+00 1.47279612e+01 7.25588657e+00 4.64469517e+00 3.76671576e+00 -1.73076590e+00 2.33999465e+00 -3.55424691e+00 3.83342072e+00 5.99063081e+00 -4.00408482e+00 3.82716070e+00 2.50203456e+00 1.32719069e+00 -3.12889026e+00 -3.01913902e+00 -7.81818506e+00 -3.90505998e+00 -9.77260919e+00 -6.93445841e+00 -2.94157759e+00 3.17467824e+00 3.97917095e+00 2.77570529e+00 3.01744001e+00 1.64077575e+01 -8.87271116e+00 -2.62054482e+00 2.30407370e+00 4.74453433e+00 -1.41269625e+00 -2.79216583e+00 6.37523092e+00 -9.70748009e-01 3.38644900e+00 8.53306476e-01 -8.64297221e-01 -2.74005570e-01 1.15670480e+01 1.44913615e+00 1.52807514e+00 2.27564985e+00 4.79446939e+00 1.64901046e+00 3.08146387e-01 -3.12917533e+00 6.85386496e-01 -4.47581543e+00 1.20036590e+00 5.11643270e+00 2.77614158e+00 3.83486856e-01 -1.03350597e+00 -5.30811067e-01 3.90524680e+00 -1.31223755e+00 7.49673357e+00 -2.95417320e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387657e+00 5.83568896e+00 -1.96499921e+00 -8.10543190e+00 6.02060397e+00 3.19552464e+01 1.31808333e+01 1.59283713e+01 3.10155646e+01 1.88029015e+01 2.58401145e+01 2.62814205e+00 8.23575610e+00 1.47743264e+01 1.40476584e+01 2.42927090e+00 -4.03623915e+00 1.38368600e+01 7.12365469e-01 -3.93053992e+00 8.38815351e+00 -9.64007109e+00 -1.10395435e+01 1.78408086e+00 1.60459640e+01 -1.54082968e+00 1.72591900e+00 1.96848032e+01 2.74403421e+00 1.40063789e+00 -6.35011362e-02 4.10513359e+00 7.06231556e+00 -3.03910154e+00 -8.79802659e+00 -5.32460921e+00 -7.71104356e+00 4.85892369e+00 -1.90913808e+00 5.10640161e+00 -4.51969578e+00 1.21257258e+00 2.71610198e+00 -1.81451486e+00 1.08749210e+01 -9.35857565e+00 1.03620939e+01 -5.96524291e+00 1.48942356e+01 2.13224363e+00 7.83814758e+00 4.51676839e+00 -1.48741979e+00 -8.38739343e+00 7.92962496e-01 1.58995184e+00 2.97438183e-01 4.21035987e+00 6.39792705e+00 -2.64874214e+00 6.80454360e+00 -4.00664618e+00 2.50819358e+00 -6.26995142e+00 -1.07052060e+00 7.27909468e-01 -6.63134935e-02 -3.92831692e+00 -3.58340902e+00 1.53464582e+00 -7.37345149e-01 1.08945592e+00 8.58031915e+00 1.56167101e+00 3.46387937e+00 1.31163848e+00 2.65754303e+00 -4.46342811e-01 -6.65681830e+00 -3.09667208e+00 4.34191688e+00 2.64449813e+00 7.56880801e-01 2.61336734e-01 -7.11178575e-01 1.19944443e-01 -4.63815258e+00 6.54110512e-01 3.42657908e+00 1.86989870e-01 2.23466382e+00 -1.71003428e+00 -2.11933988e+00 4.02580740e+00 -2.80462679e+00 6.58386860e-01 -2.31639301e+00 -6.88144347e+00 1.56929194e+00 -7.92498974e-01 2.98531553e-01 -1.40331206e+00 -2.88055610e+00 3.56640076e+00 -2.07260024e+00]]
******** n_components=118 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.00259079 0.00251239 0.00242961 0.00233461 0.00227577 0.00215714 0.0021465 0.00208895 0.00202038 0.00197378 0.00191173 0.00187722 0.00184379 0.00182895 0.00178563 0.00174023 0.00169143 0.00167805 0.00163801 0.00160134 0.00156888 0.0015578 0.00153365 0.00150325 0.00146894 0.00146148 0.00143994 0.00141402 0.00140793 0.00139187 0.00137342 0.00136212 0.00135571 0.00134182 0.00130961 0.00130304 0.00129116 0.00126156 0.00125637 0.00124488 0.00123039 0.00122242 0.00121478 0.0011991 0.00118707 0.0011856 0.00116392 0.00115897 0.00114489 0.00113686 0.00113052 0.00112848 0.00111993 0.00109964 0.00108828 0.00108241 0.00107838 0.00106669 0.00105812 0.00105317 0.00104731 0.00104163 0.00103648 0.00103005 0.00101276 0.00101136 0.00100756 0.00099559 0.00099302 0.00098469 0.000975 0.0009703 0.00097072 0.00096064 0.00095237 0.00095035 0.00094241 0.00092696 0.00092166 0.00091142 0.00090034 0.00090091 0.0008993 0.00089233 0.00088804 0.00087818 0.00087755 0.0008736 0.00086624 0.00086409 0.0008557 ]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389615 16.97351609 15.56367997 13.87362396 13.43690363 12.72377825 11.63902516 10.82235154 10.37420573 9.8666447 9.67265813 8.99693957 8.43765573 8.0010657 7.58945231 7.46719349 7.08493835 6.63001611 6.45681386 6.32239132 6.13106662 5.92905686 5.6972266 5.55363458 5.26412761 5.23818115 5.09773191 4.93039198 4.81667379 4.66524737 4.58104771 4.49944817 4.46323394 4.35753871 4.24674377 4.12765979 4.09500332 3.997287 3.90780924 3.82859101 3.80155074 3.74262066 3.66842116 3.58469416 3.56648588 3.51392711 3.45067956 3.43581973 3.39661564 3.35159685 3.32401757 3.30839043 3.27449393 3.19587736 3.17984224 3.15086445 3.07863285 3.06596691 3.03792742 3.00255016 2.98310538 2.96447148 2.92620617 2.89683266 2.89326257 2.84034194 2.8282652 2.79390053 2.77431791 2.75884381 2.75386039 2.73300916 2.68349007 2.65577356 2.64143116 2.63161579 2.60308173 2.5821564 2.57009487 2.5557786 2.54192682 2.52936279 2.51366064 2.47147208 2.46804536 2.45877331 2.42957824 2.42329837 2.40296619 2.3793323 2.36786191 2.36886746 2.34427652 2.32410558 2.31915793 2.29979354 2.2620805 2.24916641 2.22416359 2.19711652 2.19852153 2.19457763 2.17758648 2.16711552 2.143048 2.14150905 2.13188151 2.11390906 2.10867191 2.08819037]
Total Variance Explained by all components: 0.4318631193029966
Shape of the reduced data: (56318, 118)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470217e+01 -6.45674035e+00 1.28157029e+01 4.84912843e+01 2.18300067e+00 2.26882456e+00 3.27898950e+01 4.25132254e+01 2.81328138e+01 3.21980830e+01 1.59419939e+01 2.61142309e+01 9.76962641e+00 -6.71782177e+00 6.66853306e+00 1.29662661e+00 5.86129759e-01 -7.83311983e+00 1.21234282e+01 -7.41916327e+00 1.77308416e+01 -4.35154769e+00 2.03576545e+01 6.98861510e+00 1.29710735e+01 6.95339230e+00 1.57119615e+01 1.44067178e+01 1.17915831e+01 -8.76476307e+00 -7.17742748e+00 1.72923024e+01 -1.05734674e+01 8.62699810e+00 -6.19316695e+00 -8.44733906e+00 1.11736580e+00 2.76593286e+00 -7.15912376e+00 1.98042678e+00 2.87613308e+00 5.23246347e+00 -2.59139534e+00 1.21169174e+01 7.24184138e+00 1.34113001e+01 1.10718223e+00 1.42523850e+01 6.84768212e+00 5.61528742e+00 3.27392604e+00 2.07686630e+00 2.77045540e+00 -3.20725664e+00 4.78832125e+00 -4.86514662e+00 -4.07578412e+00 2.11402791e+00 1.49433523e+00 1.09873372e+00 -5.30248374e+00 -1.33471434e-02 -8.48177509e+00 1.95783193e+00 1.11058577e+01 -5.95348915e+00 -3.72471821e+00 1.50175334e+00 2.75914896e+00 5.04292185e+00 1.54106700e+01 3.98488162e+00 5.01706155e-02 3.65542559e+00 5.01893723e+00 -1.18933794e+00 2.27790788e+00 8.54062305e+00 -7.49178230e+00 -8.52835766e-01 3.76923522e+00 5.70233232e+00 -1.80603983e+00 -2.88074865e+00 -4.99463779e+00 1.55146316e+00 2.28092955e+00 -3.15522854e+00 -4.92663505e-01 -4.14421144e-01 -6.13554851e+00 1.53123040e+00 -7.84620966e-01 2.32652083e-01 -1.01623832e+00 -3.90778860e+00 -1.62060269e-01 8.53250687e-01 -1.49071437e-01 4.19103611e+00 1.76159222e+00 1.48059718e+00 1.91055973e+00 4.82674589e+00 1.93610419e+00 -3.82866953e+00 1.31914911e-01 -4.26723218e+00 4.25185152e+00 -5.11031519e+00 5.39254886e+00 2.30389550e+00 -2.01318661e+00 3.23390947e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387652e+00 5.83568903e+00 -1.96499966e+00 -8.10543359e+00 6.02060854e+00 3.19552428e+01 1.31808239e+01 1.59283782e+01 3.10154272e+01 1.88029127e+01 2.58401742e+01 2.62823376e+00 8.23526127e+00 1.47736121e+01 1.40478764e+01 2.42969136e+00 -4.03637690e+00 1.38388643e+01 7.16410669e-01 -3.92478695e+00 8.39199139e+00 -9.63685554e+00 -1.10469640e+01 1.77887550e+00 1.60601000e+01 -1.53062787e+00 1.74145639e+00 1.97100227e+01 2.77303501e+00 1.41959807e+00 -4.63422755e-02 4.04703402e+00 7.13970543e+00 -3.01588991e+00 -8.93694713e+00 -5.17845330e+00 -7.72626898e+00 4.91173558e+00 1.96453763e+00 5.04844609e+00 -4.56241350e+00 1.23450622e+00 2.54142583e+00 -1.59354909e+00 1.08262417e+01 -9.87155924e+00 1.01784343e+01 6.05217145e+00 1.45736285e+01 1.08091436e+00 9.05125651e+00 3.33874392e+00 1.41912518e+00 -7.30118164e+00 6.18464937e-01 1.83018505e+00 -3.49671523e-02 3.25626340e+00 5.95198587e+00 -2.71790286e+00 8.30430415e+00 -1.51161731e+00 -5.01144626e+00 -6.66668681e+00 2.06074238e+00 2.00854521e-01 1.52330954e+00 -4.70610601e+00 -7.50649682e-01 -4.35456607e+00 3.94120244e+00 4.10668072e+00 2.94935660e+00 -2.73459845e+00 6.31392584e+00 -2.42962917e+00 -8.35469434e-01 1.96645430e+00 3.84197959e+00 -3.01023616e+00 6.03440573e+00 -1.52715223e+00 -4.43906252e+00 -4.12426967e+00 7.73597819e-01 5.09922200e+00 1.98437591e+00 8.84536683e-01 3.02304613e+00 3.29809036e+00 -6.09133972e+00 -2.86443891e+00 5.18321085e+00 2.59769582e+00 -3.42282320e+00 -3.78104054e+00 -7.59444290e-01 2.71983866e-02 -1.21731684e+00 3.12306348e+00 2.27768743e+00 -4.29259060e-01 2.31585876e+00 1.86535032e+00 1.42165673e+00 -1.59171560e+00 -2.58450336e+00 3.62454907e+00 -2.08500146e+00 5.10385236e-01 2.37909524e+00 3.32376343e+00 -5.32475329e-01 1.09409269e+00 5.00862293e-01]]
******** n_components=128 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.00259079 0.00251239 0.00242961 0.00233462 0.00227577 0.00215715 0.00214652 0.00208894 0.0020204 0.00197379 0.00191174 0.00187723 0.00184386 0.00182894 0.00178568 0.00174035 0.0016913 0.00167818 0.00163804 0.00160223 0.00156915 0.0015586 0.00153372 0.00150323 0.00146947 0.0014617 0.00143975 0.00141352 0.00140829 0.00139268 0.00137405 0.00136291 0.00135512 0.00134179 0.00131026 0.00130395 0.00129505 0.0012586 0.00125704 0.00124736 0.00124096 0.00122507 0.00121703 0.0012035 0.00118921 0.00118753 0.00116436 0.00115874 0.0011507 0.00114294 0.00113362 0.00112702 0.00112165 0.00110154 0.00109646 0.00108958 0.00107501 0.0010747 0.00105908 0.00105297 0.00105084 0.001044 0.00104074 0.00103344 0.00102147 0.00101757 0.00101313 0.0010086 0.00099358 0.0009908 0.0009785 0.00097982 0.00097184 0.00096717 0.00096153 0.00095386 0.00095367 0.00094409 0.00093642 0.00092623 0.00092309 0.00091859 0.00091333 0.00090997 0.00089948 0.00089577 0.00089083 0.0008861 0.00087987 0.00087711 0.00087182 0.00086828 0.00086218 0.00085685 0.00084936 0.00084754 0.00083378 0.00083362 0.00083288 0.00082376 0.00082384]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460655 22.63420233 19.14389617 16.97351607 15.56367996 13.87362371 13.43690363 12.72377829 11.63902524 10.82235201 10.37420548 9.86664416 9.67265782 8.99693989 8.4376566 8.00106831 7.58945294 7.46718868 7.08493562 6.63001238 6.45682463 6.32238988 6.13107141 5.92905407 5.69723275 5.55362692 5.26414912 5.23820899 5.09770349 4.93044066 4.81670901 4.6652819 4.58106587 4.49961776 4.46320887 4.35765523 4.24702419 4.12732971 4.09532291 3.9973575 3.90998148 3.82923935 3.80349289 3.74279044 3.66838852 3.585986 3.56702179 3.51346026 3.44944811 3.43668516 3.3985882 3.35312833 3.32596251 3.30694281 3.27439993 3.19746304 3.18207214 3.16035242 3.07140348 3.0675889 3.043979 3.02834911 2.98956819 2.96994717 2.93693287 2.90205466 2.89796539 2.84141428 2.82771563 2.80809429 2.78915417 2.76641007 2.75030324 2.73719793 2.68811436 2.67573504 2.65893156 2.62338602 2.62262018 2.58451503 2.56959949 2.56440881 2.54771375 2.53975771 2.52192319 2.4927198 2.48321249 2.47237959 2.46131156 2.42467289 2.41786997 2.38785919 2.39107329 2.37161723 2.36022381 2.34645215 2.32772289 2.32728187 2.30388938 2.28517917 2.26029864 2.2526542 2.24166626 2.22883209 2.22063639 2.19503797 2.18596567 2.17391716 2.16236672 2.14718252 2.14044973 2.1275353 2.11889245 2.10401598 2.09100822 2.0727187 2.06827651 2.03470801 2.03431575 2.03250511 2.01024734 2.0104385 ]
Total Variance Explained by all components: 0.4406819452826911
Shape of the reduced data: (56318, 128)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470219e+01 -6.45674028e+00 1.28157034e+01 4.84912811e+01 2.18298958e+00 2.26880621e+00 3.27898751e+01 4.25132413e+01 2.81329301e+01 3.21982234e+01 1.59424536e+01 2.61140323e+01 9.76963729e+00 -6.71702592e+00 6.66833703e+00 1.29615812e+00 5.85739396e-01 -7.83354066e+00 1.21214566e+01 -7.41504820e+00 1.77301943e+01 -4.35216285e+00 2.03525233e+01 6.98930083e+00 1.29639416e+01 6.95707342e+00 1.56759485e+01 1.44085248e+01 1.17676424e+01 -8.78707827e+00 -7.16502286e+00 1.73127516e+01 -1.06101523e+01 8.57627914e+00 -6.17010503e+00 -8.38926908e+00 1.22170388e+00 2.73484589e+00 -7.19538523e+00 2.02739651e+00 3.11170798e+00 5.10849766e+00 -2.55387926e+00 1.18313923e+01 6.90735214e+00 1.37115461e+01 4.42644936e-01 1.46812593e+01 6.48925818e+00 5.93669499e+00 3.79731036e+00 1.16549433e+00 3.22190214e+00 -2.95466275e+00 4.51450643e+00 4.63638680e+00 -3.85896941e+00 1.60432330e+00 -2.61842922e+00 1.64800692e+00 -3.06310181e+00 -2.43864316e+00 -6.00960928e+00 6.03779272e+00 -1.05782972e+01 -8.13273219e+00 -3.18261263e+00 4.59592034e+00 1.16563861e+00 -4.26389128e+00 1.27094301e+01 7.53584860e+00 -2.27241132e+00 5.71293831e+00 -8.67512648e-01 -3.83082722e+00 7.03388412e+00 -3.78405100e+00 3.57136939e+00 9.04910802e+00 5.38475692e+00 -3.85216067e+00 1.19114135e+00 -2.44895326e+00 -4.71670058e+00 -3.42787315e+00 1.20814756e+00 1.67031578e+00 3.67651266e+00 -4.53446386e+00 2.10954444e+00 -4.66824348e+00 -3.46967725e+00 3.53093883e-01 1.91303710e+00 3.88005811e+00 1.99875352e+00 5.36125085e-01 -1.13964667e+00 2.42423120e+00 5.01920330e+00 -8.70203517e-01 -2.15357447e+00 -6.62252015e+00 -1.87926874e-01 -3.70186936e+00 4.91322455e+00 -5.15735705e+00 1.88809587e+00 -3.67789192e-01 -4.79776685e+00 1.44633275e+00 1.36058842e+00 -2.09097094e+00 -3.81197379e+00 3.90179192e+00 -5.89608308e+00 -4.09838164e+00 -4.17491070e+00 -1.88326678e+00 5.24118553e+00 2.71168689e+00 6.53732203e-01 -7.99466197e-01] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387651e+00 5.83568903e+00 -1.96499952e+00 -8.10543333e+00 6.02061095e+00 3.19552480e+01 1.31808171e+01 1.59283829e+01 3.10155021e+01 1.88029360e+01 2.58403240e+01 2.62853287e+00 8.23549157e+00 1.47741368e+01 1.40477119e+01 2.42880440e+00 -4.03658770e+00 1.38388099e+01 7.13631918e-01 -3.92983335e+00 8.39131178e+00 -9.63542764e+00 -1.10466993e+01 1.78706152e+00 1.60607819e+01 -1.53157865e+00 1.73953319e+00 1.96879270e+01 2.78259764e+00 1.42685769e+00 -3.66656405e-02 4.05493461e+00 7.17768837e+00 -3.01621742e+00 -8.92898575e+00 -5.12997257e+00 -7.66603217e+00 5.11103899e+00 1.88402017e+00 5.00820553e+00 -4.56679552e+00 1.41012062e+00 2.54421578e+00 -1.37494546e+00 1.03857273e+01 -1.00647160e+01 1.04605744e+01 5.85518295e+00 1.48401222e+01 1.24984708e+00 8.88604272e+00 3.80846018e+00 7.23473603e-01 -7.67709690e+00 4.89647106e-02 1.79933950e+00 8.88635474e-02 3.37461789e+00 7.17431872e+00 -1.89770201e+00 7.60567814e+00 2.07733899e+00 -6.62384501e+00 -4.66462161e+00 3.61143785e+00 1.39071518e-01 6.20321344e-01 -3.69962405e+00 -1.89041280e+00 -1.94428892e+00 -4.54686658e-02 6.05557267e+00 5.61405165e+00 2.18008828e+00 -6.71512232e-01 -7.18238360e-01 -4.97452373e+00 4.09302925e+00 -4.51188421e+00 1.27669601e+00 -1.42457275e+00 -2.63466816e+00 -1.12804311e+00 -5.19474073e+00 -2.71797039e+00 7.67634157e-01 1.48209576e+00 -3.05209200e+00 2.65778710e+00 -1.44079108e+00 2.94313286e+00 -1.13287828e+00 4.23374595e+00 8.97683534e-01 -2.70197214e+00 4.33483064e+00 3.08018548e+00 -2.64306307e+00 -4.03057597e+00 4.64447125e-01 6.53081662e+00 1.37858271e+00 -1.94665713e+00 -7.83043102e-01 -8.45069264e+00 -4.49058883e+00 4.85272778e+00 4.24608542e-03 -2.43433563e+00 -1.29521418e+00 4.78731668e-01 2.30975503e+00 1.60075029e+00 2.41509841e+00 2.60933029e-01 -1.50303943e+00 4.08292280e+00 -2.10696375e+00 3.50047695e-01 1.30582359e+00 5.61664720e+00 4.99361090e+00 2.66198574e+00 -2.41830658e+00 1.11205787e+00]]
******** n_components=138 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.0025908 0.0025124 0.00242962 0.00233462 0.00227578 0.00215715 0.00214653 0.00208896 0.00202041 0.0019738 0.00191178 0.00187727 0.00184384 0.001829 0.00178587 0.00174053 0.00169152 0.00167825 0.00163807 0.00160209 0.00156938 0.0015587 0.00153396 0.00150395 0.00146964 0.00146196 0.00144035 0.00141493 0.00140797 0.00139346 0.00137465 0.00136337 0.00135648 0.00134137 0.0013114 0.00130669 0.00129696 0.00126346 0.00125765 0.00124823 0.00124145 0.00122602 0.0012208 0.00120721 0.00119097 0.00118903 0.00116521 0.00116237 0.00115023 0.0011458 0.00113622 0.00113206 0.00112812 0.00110509 0.00109778 0.00109242 0.00108273 0.00107779 0.00106559 0.00106124 0.00105317 0.00105196 0.00104272 0.00103401 0.00102966 0.0010264 0.00100956 0.00101023 0.00100496 0.00099568 0.00099397 0.00098315 0.00097748 0.00097019 0.00096788 0.00096502 0.00096485 0.0009524 0.00094185 0.00093835 0.00093314 0.00093136 0.00092631 0.00091763 0.00090876 0.00090343 0.00090029 0.00089966 0.00089087 0.00088818 0.00088467 0.00088002 0.00087758 0.00087554 0.00087339 0.00086333 0.00085867 0.00085648 0.00084981 0.00084841 0.00084404 0.00083611 0.00082881 0.00082215 0.00081941 0.00081273 0.00080987 0.00080924 0.00079539 0.0007944 0.00079209]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389613 16.97351607 15.56368002 13.87362375 13.43690363 12.72377828 11.6390253 10.82235183 10.37420577 9.86664451 9.67265825 8.99693964 8.43765708 8.00106781 7.5894525 7.46718797 7.08493899 6.63001648 6.45682989 6.32239854 6.13107923 5.92906473 5.69724729 5.55364713 5.26416875 5.23823456 5.09775669 4.93047497 4.81672842 4.66538264 4.58115321 4.49958352 4.46337317 4.35811981 4.24747477 4.12786196 4.09549291 3.99743225 3.90963186 3.82981727 3.80375449 3.74335894 3.6701314 3.58641027 3.56765722 3.51492967 3.4528964 3.43590074 3.40050335 3.35459181 3.32707896 3.31026824 3.2733949 3.20026127 3.18875033 3.1650004 3.08327219 3.06907342 3.04608827 3.02955051 2.99189188 2.97915995 2.94598051 2.90635926 2.90161706 2.84349051 2.83656407 2.80694323 2.79614056 2.77275377 2.76260395 2.75298728 2.69679184 2.67895611 2.6658721 2.64222185 2.63016736 2.60039562 2.58976778 2.57007657 2.56713096 2.54457404 2.5233333 2.51270985 2.50474331 2.46367101 2.46529684 2.45242603 2.4297961 2.42561255 2.39920677 2.38537954 2.36758397 2.36195763 2.35496156 2.35455447 2.32417031 2.29842282 2.28988146 2.27716898 2.27282747 2.26049848 2.23932843 2.21766723 2.20466882 2.19699839 2.19547886 2.17401637 2.16745295 2.15889233 2.14753996 2.14157821 2.13661793 2.13136396 2.1068039 2.09544806 2.09010318 2.07381665 2.07040628 2.05974132 2.04039647 2.0225804 2.0063137 1.99962707 1.98333688 1.97635424 1.97482097 1.94100662 1.93859461 1.93297037]
Total Variance Explained by all components: 0.4493274172605292
Shape of the reduced data: (56318, 138)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470218e+01 -6.45674034e+00 1.28157032e+01 4.84912845e+01 2.18300399e+00 2.26882198e+00 3.27899140e+01 4.25132782e+01 2.81328065e+01 3.21982297e+01 1.59423057e+01 2.61139920e+01 9.76980337e+00 -6.71678218e+00 6.66795986e+00 1.29625312e+00 5.85094481e-01 -7.83572285e+00 1.21217533e+01 -7.41261047e+00 1.77337915e+01 -4.34966275e+00 2.03614902e+01 6.98690835e+00 1.29742687e+01 6.94943028e+00 1.56935303e+01 1.43921530e+01 1.17680259e+01 -8.78712938e+00 -7.19922247e+00 1.73478164e+01 -1.05946832e+01 -8.54091606e+00 -6.19568493e+00 -8.39035485e+00 1.09834260e+00 2.73099209e+00 -7.08490979e+00 1.97293447e+00 3.03181488e+00 5.10288429e+00 -2.68271477e+00 1.19430594e+01 7.03484525e+00 1.31804333e+01 6.23829950e-01 1.49020570e+01 6.49696527e+00 5.79646007e+00 3.90130898e+00 -1.89186212e+00 2.95159316e+00 -2.63211967e+00 3.36914841e+00 6.14769947e+00 -4.06608461e+00 3.09035166e+00 1.82369777e+00 2.33979033e+00 -4.17834122e+00 -1.32151747e+00 -6.55560608e+00 -2.66257534e+00 -1.23181641e+01 5.43621806e+00 -2.94833047e+00 3.33716515e+00 -2.20009863e+00 -1.12276994e+01 1.13487494e+01 6.87080952e+00 2.43594023e+00 5.32944231e+00 2.59458906e+00 3.48478463e+00 -1.27146379e+00 7.91563974e+00 -7.55535980e-01 -1.44289916e+00 -7.51121560e+00 1.02557700e+00 -5.15064258e+00 -1.39044334e+00 -3.50127285e+00 1.68253420e+00 3.34304976e+00 3.15848396e-01 3.46842683e+00 -2.96503348e+00 -8.95545172e-01 2.03605112e+00 -3.55787035e-01 9.88889806e-01 -3.64872840e+00 -3.63343177e+00 -6.82587483e-01 -3.75820682e+00 1.50367506e-01 -7.02512152e-01 -2.68115486e+00 4.75271383e+00 6.33214599e+00 -2.74776585e+00 -7.19864226e-01 6.76009254e-01 -1.12035816e+00 9.97585408e+00 2.67661884e+00 -2.24864629e+00 4.67249835e+00 2.35726820e+00 1.04291857e+00 -8.34675139e-01 -4.33319242e+00 3.79312951e+00 5.25389374e-01 -2.16004292e-01 1.16732373e+00 -1.00910273e+00 1.38212324e+00 1.53984228e+00 4.60134763e-01 1.55837384e+00 5.30945724e+00 3.83208812e+00 -3.33133829e-02 7.04728397e+00 -2.17519838e+00 6.57631899e-02 3.34911554e+00 -4.77366374e+00 2.22955882e+00 -2.41131079e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387648e+00 5.83568900e+00 -1.96499949e+00 -8.10543384e+00 6.02060928e+00 3.19552444e+01 1.31808254e+01 1.59283873e+01 3.10154651e+01 1.88027778e+01 2.58403124e+01 2.62826215e+00 8.23542693e+00 1.47740144e+01 1.40477585e+01 2.42904579e+00 -4.03674602e+00 1.38374695e+01 7.13853736e-01 -3.92898093e+00 8.39619411e+00 -9.63460884e+00 -1.10448007e+01 1.78083534e+00 1.60590729e+01 -1.53036151e+00 1.74104411e+00 1.96925477e+01 2.76890768e+00 1.40218849e+00 -4.68297819e-02 4.04739331e+00 7.16478575e+00 -2.97877322e+00 8.97028059e+00 -5.15740602e+00 -7.70992771e+00 4.99441323e+00 1.86628770e+00 5.00946628e+00 -4.68720357e+00 1.36217500e+00 2.45756563e+00 -1.61805194e+00 1.05122578e+01 -9.75313676e+00 1.04049566e+01 5.81463086e+00 1.47587050e+01 1.03745961e+00 8.72922199e+00 4.28917067e+00 -9.93370087e-01 -7.44139341e+00 6.58413046e-01 2.33134417e+00 -7.91869649e-02 2.87271847e+00 5.38412826e+00 -2.64958461e+00 8.32867779e+00 -2.29284946e+00 5.59422767e+00 -6.72056205e+00 -1.49899758e+00 1.41961380e+00 5.34642585e-03 -3.72711202e+00 -2.91780176e+00 7.91193395e-01 -2.40421596e+00 5.18088128e+00 2.48257798e+00 7.06513095e+00 -1.07903920e+00 1.66972042e+00 3.64407956e+00 1.79319116e+00 6.23103749e+00 -4.14485452e-01 1.49018102e+00 5.25858374e+00 9.50102583e-03 6.90986153e-01 -3.36395997e+00 4.37609792e+00 -2.54459424e+00 2.66549863e+00 3.42770913e+00 2.88629115e+00 4.21109210e+00 4.51663476e-02 -3.08647998e-01 -1.65368269e-02 -5.37997134e-01 -1.96106627e+00 5.04807412e+00 2.42097076e+00 8.58636567e-01 -3.68470964e+00 -6.94970009e+00 2.34621605e+00 3.67986675e-01 6.26526097e+00 -2.11244220e+00 9.67146969e-01 4.00301472e+00 6.03792199e-01 -4.76592998e-01 1.77299683e+00 4.18998434e+00 -1.36890488e+00 2.55234290e+00 -9.89569391e-01 9.08833789e-01 -9.30134393e-01 1.04770884e+00 3.54327049e+00 -2.84059201e-01 1.36499272e+00 -2.80513656e-01 2.69645794e+00 -4.02745034e+00 -7.75009515e-01 -1.63280442e+00 2.00291244e+00 4.66345165e+00 -2.32433335e+00 1.08107955e+00 -8.57033285e-02 -3.39287057e+00 -2.02288751e+00 -4.63060722e+00 -7.04892480e-01 3.56802427e+00]]
******** n_components=148 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.00259079 0.0025124 0.00242961 0.00233462 0.00227578 0.00215715 0.00214652 0.00208896 0.0020204 0.00197381 0.0019118 0.00187727 0.00184389 0.00182903 0.00178583 0.00174055 0.00169156 0.00167826 0.00163829 0.0016023 0.00156953 0.00155863 0.00153429 0.00150422 0.00146964 0.00146244 0.00144059 0.00141541 0.00140842 0.00139406 0.00137398 0.00136378 0.00135762 0.00134286 0.00131147 0.00130776 0.0012972 0.00126373 0.00125844 0.00124794 0.00124277 0.00122645 0.00122108 0.00120608 0.00119169 0.00118944 0.00116828 0.00116415 0.00115609 0.00114682 0.00113491 0.00113409 0.00112721 0.0011097 0.00110066 0.00109639 0.00108645 0.00107875 0.00106683 0.00106324 0.0010566 0.00105383 0.00104563 0.00104102 0.00103445 0.00102329 0.00101691 0.00101815 0.00101268 0.00100395 0.00099704 0.00098853 0.00098686 0.00098071 0.00097813 0.00096905 0.00096173 0.00095998 0.00095224 0.00094617 0.00093916 0.00093397 0.00093285 0.00092343 0.00092009 0.00091592 0.00091324 0.00090589 0.0008992 0.00089873 0.0008935 0.00088805 0.00088438 0.00088216 0.00087752 0.00087629 0.00086635 0.00086408 0.00085937 0.00085375 0.00085131 0.00084351 0.00084315 0.00083511 0.00083228 0.00082587 0.00082547 0.00081797 0.00081588 0.00081135 0.00080422 0.00080204 0.00079987 0.00079426 0.00078731 0.00078226 0.00078333 0.00077738 0.00076816 0.00076821 0.00075807]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389614 16.97351607 15.56368001 13.87362379 13.43690363 12.72377828 11.63902532 10.82235197 10.37420565 9.86664406 9.67265798 8.99693923 8.43765673 8.00106833 7.58945248 7.46718937 7.08494047 6.63001633 6.45683114 6.32239493 6.13107654 5.92906359 5.69725098 5.55365999 5.26416869 5.23822525 5.09776329 4.93044903 4.81673691 4.66542677 4.58116007 4.49969611 4.46342576 4.35800931 4.24752318 4.12796646 4.09550717 3.9979672 3.91014837 3.83017134 3.80356511 3.74416452 3.67080384 3.58640446 3.56884565 3.51551289 3.45407602 3.43700599 3.40197941 3.35295833 3.32807476 3.31304421 3.2770163 3.20042336 3.19137472 3.16559959 3.08392335 3.07099773 3.04539588 3.03276322 2.99294149 2.97983022 2.94323641 2.90812617 2.90263402 2.85099685 2.84090207 2.82123802 2.79860957 2.76956564 2.76756622 2.75076248 2.70803431 2.68597153 2.67555898 2.65130273 2.6325026 2.60342704 2.59466596 2.57844974 2.57169057 2.55168045 2.54043711 2.52440787 2.49715525 2.48158915 2.48463311 2.47126598 2.44997163 2.43311065 2.4123507 2.40826269 2.39325742 2.38696705 2.36481347 2.34693867 2.34266702 2.32377745 2.30897805 2.29187055 2.27920404 2.27645778 2.2534764 2.24531706 2.23514013 2.22859858 2.21067909 2.19433894 2.19319641 2.18043608 2.16713094 2.15816997 2.15276471 2.14143277 2.13844642 2.11418931 2.1086473 2.09714915 2.08343622 2.07748367 2.05844646 2.05756117 2.03794599 2.03104899 2.01539596 2.01442324 1.99611147 1.99101102 1.97995423 1.96255896 1.95723075 1.95194973 1.93824636 1.92129353 1.908976 1.91158137 1.89705798 1.87457631 1.87468402 1.84994565]
Total Variance Explained by all components: 0.4576049822021624
Shape of the reduced data: (56318, 148)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470218e+01 -6.45674037e+00 1.28157026e+01 4.84912815e+01 2.18300304e+00 2.26881719e+00 3.27898771e+01 4.25133142e+01 2.81327710e+01 3.21981439e+01 1.59422979e+01 2.61141490e+01 9.76991255e+00 -6.71704240e+00 6.66844012e+00 1.29681494e+00 5.84256306e-01 -7.83346938e+00 1.21228853e+01 -7.41565921e+00 1.77309191e+01 -4.35092874e+00 2.03564048e+01 6.98563042e+00 1.29686959e+01 6.96190308e+00 1.56976959e+01 1.44100342e+01 1.17960485e+01 -8.78713746e+00 -7.21225817e+00 1.73323683e+01 -1.05578737e+01 8.59186730e+00 -6.23742589e+00 -8.40782911e+00 1.06972936e+00 -2.71437510e+00 -7.23572668e+00 1.88739989e+00 3.02450067e+00 5.31348841e+00 -2.75667130e+00 1.18315552e+01 6.68536990e+00 1.33467362e+01 8.30361275e-01 1.49020822e+01 6.62619799e+00 5.60272109e+00 3.51095420e+00 1.96174238e+00 3.23466954e+00 -2.95183128e+00 4.28353496e+00 5.49142634e+00 -4.11617832e+00 -3.16330308e+00 1.28463490e+00 1.71917619e+00 -4.80106075e+00 1.02979090e+00 -7.31453745e+00 -3.72140151e+00 -8.83719300e+00 -9.22566489e+00 -3.76634006e+00 3.04271249e+00 6.07897705e-01 4.03036773e+00 1.48607520e+01 -8.18677560e+00 -9.82089403e-01 4.60647918e+00 2.49196896e+00 2.75281126e+00 -3.39066111e-01 6.67800175e+00 -4.36770820e+00 -4.13080474e-01 -7.51928575e+00 -3.83810728e+00 -3.94873717e+00 4.61170207e-01 -5.98961557e+00 4.35205957e+00 -1.21042530e+00 1.24171255e+00 -1.19489490e+00 -7.72976640e+00 1.08989666e+00 -5.95050132e-01 1.89156814e+00 -2.33987044e+00 -1.46731989e+00 2.03699925e+00 4.85951524e-01 8.93009834e-01 -1.38589265e+00 1.21993830e+00 9.14786581e-01 -6.48722593e+00 2.35657118e-01 -8.66390846e-01 9.98434887e-01 -4.96248643e+00 6.81498026e+00 -6.84948398e+00 2.19031328e-01 6.37366091e+00 1.80430282e+00 -5.62809365e+00 1.63262228e-01 3.47467402e+00 1.15860587e+00 3.95991231e+00 -5.56022916e+00 -1.62761708e+00 -6.21199379e-01 -1.42917074e+00 1.46432520e+00 3.98206818e+00 1.64100787e+00 7.53965157e-02 1.58645496e+00 -3.06138629e+00 -6.35329966e+00 5.94956136e+00 3.83149107e+00 1.15166265e+00 9.47001882e-01 -1.79088035e-01 -5.70777520e+00 -2.10943542e+00 3.96810937e+00 -2.89994233e+00 3.81291856e+00 3.03842278e+00 -4.31556754e-01 -3.68970973e+00 -2.06710099e-01 1.08946342e+00 3.13787230e-01 1.88406187e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387651e+00 5.83568896e+00 -1.96499961e+00 -8.10543487e+00 6.02060882e+00 3.19552406e+01 1.31808339e+01 1.59283920e+01 3.10154998e+01 1.88028590e+01 2.58402036e+01 2.62840258e+00 8.23556061e+00 1.47738536e+01 1.40477267e+01 2.42947076e+00 -4.03616936e+00 1.38386476e+01 7.13954360e-01 -3.92717213e+00 8.39246773e+00 -9.63667872e+00 -1.10449719e+01 1.78272809e+00 1.60597717e+01 -1.53078845e+00 1.73845781e+00 1.96886931e+01 2.77718778e+00 1.42841984e+00 -4.26783458e-02 4.05366258e+00 7.16220677e+00 -2.97770025e+00 -8.92915961e+00 -5.21488270e+00 -7.68839391e+00 4.95978046e+00 -1.86771412e+00 4.96044281e+00 -4.65245706e+00 1.26691224e+00 2.64094912e+00 -1.53658427e+00 1.04235261e+01 -9.94311217e+00 1.02064536e+01 5.93484724e+00 1.49447017e+01 9.37164909e-01 8.88476391e+00 3.96570115e+00 1.40510782e+00 -7.47930529e+00 4.35441673e-01 2.05453050e+00 2.42013421e-02 3.07557902e+00 -6.01438048e+00 -2.86474270e+00 8.25345599e+00 -2.51543811e+00 -6.04805416e+00 -5.95363937e+00 -1.83549793e+00 1.20596313e+00 9.85627505e-01 -3.55177364e+00 -3.01403995e+00 -1.24758821e+00 2.60079137e-01 4.21290781e+00 -6.46968355e+00 4.53897395e+00 1.86383040e+00 -3.17459330e+00 3.49329878e+00 1.00165518e+00 7.11275164e+00 -1.49939307e+00 1.64334749e+00 3.20092339e+00 2.05959429e+00 5.98915964e-01 -3.17294046e+00 2.95420537e+00 -1.28314834e+00 6.38630001e-01 6.04111577e+00 7.55293762e-01 2.89710161e+00 -2.47839412e+00 2.26462143e+00 4.52873988e+00 1.86614672e-01 3.72580004e+00 3.49369333e+00 1.34922376e+00 -2.62401633e+00 6.13245419e+00 1.12364535e+00 3.02193664e+00 5.23608604e-01 9.32608813e-01 4.36231080e-01 4.04391942e-01 -6.35225247e+00 5.00609598e+00 4.27012928e-01 1.30247270e+00 2.57632865e+00 -4.67600219e+00 -2.11905188e+00 2.08703511e+00 1.11108938e+00 1.93274349e+00 2.47694415e+00 1.35458938e+00 -1.85759995e+00 4.44637773e-01 -4.66745168e-01 4.06913351e+00 -3.09301713e+00 3.79862622e+00 -8.85983031e-01 -1.11754915e+00 2.42169786e+00 -2.88559009e+00 2.80527462e+00 2.84991464e+00 4.96102663e-01 4.77445573e-01 2.17104552e+00 -3.81716057e+00 -1.30021212e+00 -1.47122611e+00 9.35902323e-01 1.90326302e+00 -1.47778535e-01 5.69688256e-01 -2.47655206e+00 -1.67065031e+00 3.82650046e+00 -2.28048457e-01 2.36566168e+00]]
******** n_components=158 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.0025908 0.0025124 0.00242961 0.00233462 0.00227578 0.00215716 0.00214653 0.00208898 0.00202043 0.00197382 0.00191181 0.00187726 0.00184387 0.00182903 0.00178589 0.00174057 0.00169159 0.0016783 0.0016383 0.00160216 0.00156951 0.00155885 0.00153429 0.00150434 0.00146999 0.00146258 0.00144093 0.00141553 0.00140851 0.00139407 0.00137494 0.00136374 0.00135775 0.00134336 0.00131252 0.00130838 0.00129793 0.00126471 0.0012591 0.00124889 0.00124253 0.00122849 0.00122224 0.00120787 0.00119384 0.00119062 0.00116995 0.00116488 0.00115731 0.00114841 0.00113613 0.00113085 0.00112883 0.00111079 0.00110488 0.00109559 0.00108759 0.00108058 0.00106852 0.00106488 0.00106029 0.00105873 0.0010489 0.001045 0.00103505 0.00102968 0.00101943 0.00101659 0.00100983 0.00100482 0.00099745 0.00099491 0.00098324 0.00098342 0.00097812 0.00096944 0.00096685 0.00095785 0.00095394 0.00094227 0.00093991 0.00093827 0.000936 0.00092925 0.00092284 0.00091937 0.00091298 0.00090835 0.00090725 0.00089895 0.00089524 0.00089239 0.00088929 0.00088598 0.00088159 0.00088259 0.00087564 0.0008685 0.00086827 0.00086707 0.00085935 0.00085346 0.00084809 0.00084446 0.00084205 0.00083872 0.00083534 0.00083208 0.00082362 0.00082105 0.00081876 0.00081625 0.00080886 0.00080368 0.00079944 0.00079519 0.0007897 0.00078662 0.00078569 0.00077644 0.00077511 0.00077055 0.00076781 0.00076463 0.0007544 0.00075484 0.00074984 0.00074691 0.00074365 0.00074107 0.00073871]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389614 16.97351607 15.56367999 13.87362386 13.43690363 12.72377828 11.6390253 10.82235192 10.37420572 9.86664408 9.67265803 8.99693939 8.43765707 8.00106861 7.5894531 7.46718956 7.08493998 6.63001861 6.45683025 6.32240024 6.13108177 5.92906317 5.69725634 5.55365515 5.2641917 5.23824559 5.09779208 4.93050688 4.8167707 4.66544579 4.5811415 4.49965896 4.46344024 4.3581563 4.24757262 4.12804293 4.09560914 3.99799521 3.90979531 3.8301206 3.80410551 3.74418052 3.67109831 3.5872725 3.56916816 3.5163557 3.45436598 3.43722793 3.40199911 3.35529736 3.32797569 3.31335969 3.27823194 3.20299474 3.19287077 3.16737018 3.08631511 3.07262666 3.04770739 3.03218361 2.99791219 2.982668 2.94759947 2.91335899 2.90550378 2.85507025 2.84270301 2.82420803 2.80250359 2.77252889 2.75965042 2.75470937 2.71070013 2.6962684 2.67360024 2.65408439 2.6369672 2.60753744 2.59866191 2.58745513 2.58364139 2.55966208 2.55013769 2.52586879 2.512758 2.48773408 2.48081268 2.46433102 2.45210457 2.43410065 2.42792055 2.39943734 2.39987067 2.38694719 2.36576316 2.35944146 2.33747787 2.32792199 2.29945432 2.2936847 2.28968507 2.28414747 2.26766589 2.25204667 2.24357558 2.22797482 2.21667032 2.21398813 2.1937433 2.18467079 2.1777334 2.17015843 2.1620793 2.15138264 2.15381071 2.13685093 2.11943649 2.11887029 2.1159352 2.0970927 2.08273284 2.069608 2.0607551 2.05488467 2.04676503 2.03849952 2.0305599 2.00991331 2.00363631 1.99804648 1.99191414 1.97389736 1.96125548 1.95090699 1.94053868 1.92712909 1.91961415 1.91733368 1.89476127 1.89153479 1.88038991 1.87369885 1.8659481 1.84098203 1.84205029 1.82985642 1.82269702 1.81475842 1.80846574 1.80268857]
Total Variance Explained by all components: 0.46551669359952574
Shape of the reduced data: (56318, 158)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470218e+01 -6.45674028e+00 1.28157026e+01 4.84912822e+01 2.18299591e+00 2.26882739e+00 3.27898601e+01 4.25132938e+01 2.81328090e+01 3.21981442e+01 1.59423050e+01 2.61141167e+01 9.76979423e+00 -6.71685973e+00 6.66834811e+00 1.29710257e+00 5.84879900e-01 -7.83434260e+00 1.21211368e+01 -7.41498724e+00 1.77303645e+01 -4.34884518e+00 2.03589096e+01 6.98735644e+00 1.29630213e+01 6.95527227e+00 1.56929729e+01 1.43996570e+01 1.17798540e+01 -8.79866696e+00 -7.19475495e+00 1.73040507e+01 -1.05548886e+01 8.58943357e+00 -6.24998041e+00 -8.38161847e+00 1.10175225e+00 -2.73807231e+00 -7.18014502e+00 1.90414950e+00 2.95140637e+00 5.27426273e+00 -2.67987450e+00 1.18403014e+01 6.95769024e+00 1.33179949e+01 8.33801807e-01 1.51558652e+01 6.39609035e+00 5.78670080e+00 3.47887587e+00 -2.07315511e+00 3.02084107e+00 -3.09873375e+00 3.50079517e+00 6.47645873e+00 -3.97646943e+00 -2.73104851e+00 1.71340393e+00 1.49765845e+00 -4.22931355e+00 -1.56976946e+00 -7.05056056e+00 -3.58304567e+00 -1.09152516e+01 -8.76498120e+00 -1.99662226e+00 1.72168076e+00 2.07214885e+00 -5.14283200e+00 1.42205496e+01 8.51284088e+00 -4.86904476e-01 4.82457074e+00 3.96407206e+00 -9.57015987e-01 -6.56494362e-01 -9.43038945e+00 2.41451292e+00 5.83604046e-01 -7.52679897e+00 -2.20399944e+00 6.12202783e+00 4.26695145e+00 -1.89286848e+00 -8.45210543e-01 -5.11902593e+00 3.12674681e+00 3.22946998e+00 4.00594152e+00 -4.97808999e+00 -1.69849605e+00 -1.53362970e+00 2.74787810e+00 -7.67092126e-01 3.37407294e+00 2.72821849e+00 -9.84523060e-01 -1.10741729e-01 -1.06294695e+00 2.16295409e+00 5.13316140e-01 4.44105027e+00 1.71898169e+00 -6.28694144e-01 -3.66343466e+00 -1.10917647e+00 7.17678737e+00 2.39041565e+00 6.45545535e-01 2.62954215e+00 -8.05480457e-01 3.02439191e+00 9.20392216e+00 2.63251978e+00 -4.46179373e+00 1.09809075e+00 -1.45548137e+00 3.16841930e+00 -4.81892807e+00 2.13238538e+00 1.64746639e+00 -2.24094510e+00 2.05683908e+00 6.24932919e-01 -7.11209122e-01 -3.63843634e+00 3.94341227e+00 -3.10157257e+00 2.06468272e-01 5.88251772e+00 3.23296226e+00 2.83012606e+00 4.46647936e+00 -5.10419458e+00 2.54029362e-01 5.18790480e+00 6.35902334e-01 -4.09113572e+00 9.66858224e-01 3.15968298e+00 -5.81009197e+00 -1.29024783e+00 3.52343011e+00 4.66193231e-01 4.28245966e-02 -1.96579025e+00 -7.71626021e-02 1.31810834e+00 -1.82016502e+00 1.31914721e+00 1.81382817e+00 3.17765953e+00 -2.80746558e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387654e+00 5.83568898e+00 -1.96499962e+00 -8.10543415e+00 6.02060860e+00 3.19552406e+01 1.31808316e+01 1.59283709e+01 3.10155244e+01 1.88028728e+01 2.58402603e+01 2.62850969e+00 8.23553839e+00 1.47738718e+01 1.40476736e+01 2.42958545e+00 -4.03675325e+00 1.38384229e+01 7.13664599e-01 -3.92739197e+00 8.39356370e+00 -9.63623126e+00 -1.10431549e+01 1.78366763e+00 1.60592189e+01 -1.53246083e+00 1.74068558e+00 1.96920884e+01 2.76486918e+00 1.42703599e+00 -5.11432109e-02 4.05110757e+00 7.15796455e+00 -2.98302691e+00 -8.95678823e+00 -5.25470276e+00 -7.66734230e+00 4.97688383e+00 -1.86649170e+00 5.03361805e+00 -4.58530843e+00 1.25863294e+00 2.67262711e+00 -1.46000031e+00 1.03366226e+01 -9.95227429e+00 1.04132482e+01 5.84228715e+00 1.49576749e+01 8.97025992e-01 8.98315921e+00 4.20048047e+00 -1.50265054e+00 -7.46300239e+00 6.12428787e-01 2.09406087e+00 2.07325887e-01 3.30982237e+00 -5.89504639e+00 -1.54023257e+00 8.27869202e+00 -2.64504578e+00 5.53566762e+00 -7.05988171e+00 -2.65546327e+00 9.18205240e-01 3.19347063e-02 -3.53606314e+00 -3.58748394e+00 -9.18974483e-01 -3.16776178e-01 4.40318510e+00 6.07903318e+00 5.00711157e+00 -1.86450408e+00 5.07600891e+00 -1.65801550e-01 1.67101762e+00 -6.63061996e+00 -1.19857260e+00 -2.00499166e+00 3.63109770e+00 2.89302098e+00 -1.39862909e+00 -3.00625260e+00 -2.90702995e+00 -1.46801170e-01 2.32500283e+00 4.92700133e+00 -1.17608556e+00 -1.16538023e+00 4.59755679e+00 -2.33836202e+00 3.64049732e+00 -2.48305951e+00 2.02693664e+00 -1.47494150e+00 3.30840356e+00 4.95168979e+00 -3.94657521e+00 -3.77907161e+00 9.73208196e-01 4.60866636e+00 5.70798622e+00 -2.46850890e-01 5.83508866e-01 -3.87690908e+00 -1.42124990e+00 3.91524808e+00 1.19780900e+00 1.22820677e+00 7.13767126e-01 1.52048948e+00 1.54287057e+00 5.06026023e+00 -3.76372369e-01 4.03908883e+00 -1.28239727e-01 -3.39694757e-01 -2.77110082e+00 1.24315096e+00 4.00677496e+00 -5.17504145e-01 2.94777364e+00 1.24349704e-01 1.55437348e+00 1.31621792e+00 3.11381635e+00 -8.75503801e-01 -4.42230909e-01 -2.43486389e+00 5.97130283e+00 2.79388172e-01 8.67157254e-01 2.09193378e+00 1.12591224e+00 2.30864971e+00 5.13130754e-01 2.89295412e+00 -1.33760314e+00 2.04612342e+00 -1.99991602e+00 -1.20933493e+00 -4.91094382e-03 -3.10085946e-01 6.54728839e-01 2.81555523e+00 -8.63587708e-01 -1.79929371e+00 1.48356744e+00 9.24651110e-01 -2.13109913e+00 4.51462847e+00 -1.39384632e+00 -2.51654428e+00]]
******** n_components=168 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.0025908 0.0025124 0.00242962 0.00233462 0.00227578 0.00215716 0.00214654 0.00208898 0.00202042 0.00197383 0.00191181 0.00187729 0.0018439 0.00182904 0.0017859 0.00174059 0.0016916 0.00167831 0.00163836 0.00160237 0.00156953 0.00155887 0.00153447 0.00150441 0.00147002 0.00146275 0.00144095 0.00141554 0.00140865 0.00139405 0.00137512 0.00136417 0.00135793 0.00134358 0.00131265 0.00130837 0.00129786 0.00126465 0.0012585 0.00124963 0.0012432 0.00122884 0.00122249 0.00120871 0.00119447 0.00119136 0.00117019 0.00116626 0.00115722 0.0011499 0.00113798 0.00113485 0.00112998 0.00111074 0.00110585 0.00109559 0.00108785 0.00108163 0.00106922 0.00106545 0.00106084 0.00105962 0.00105142 0.00104517 0.00103989 0.00103195 0.00102396 0.00101748 0.00101188 0.00100922 0.00099876 0.00099732 0.00098895 0.00098647 0.00098169 0.00097305 0.00097014 0.00096055 0.0009572 0.00094762 0.00094379 0.00094045 0.0009373 0.0009322 0.00092903 0.00092839 0.00091802 0.0009168 0.00090885 0.0009037 0.00090123 0.00089836 0.00089315 0.00088904 0.00088553 0.00088294 0.00087903 0.00087691 0.00087292 0.00086863 0.00086282 0.0008597 0.0008556 0.00085011 0.00084648 0.00084346 0.00084029 0.00083757 0.00083209 0.00083005 0.00082329 0.00081798 0.00081539 0.00081381 0.00080769 0.00080627 0.00080114 0.00079787 0.0007943 0.00078969 0.0007846 0.00078152 0.00078114 0.00077614 0.00077208 0.0007678 0.00076781 0.00076062 0.00075912 0.0007521 0.00074736 0.00074669 0.00074159 0.00073973 0.00073382 0.00073195 0.00072649 0.00071859 0.00071566 0.00071008 0.0007025 ]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389615 16.97351607 15.56368002 13.87362382 13.43690363 12.72377825 11.63902536 10.8223519 10.37420573 9.86664454 9.67265805 8.99693963 8.43765726 8.00106883 7.58945299 7.46718894 7.0849392 6.63001766 6.45683143 6.32240124 6.13108254 5.92906678 5.69725264 5.55365856 5.26419532 5.23825853 5.09779466 4.93050263 4.81680583 4.66545172 4.58120876 4.49973537 4.4634662 4.35817603 4.2476042 4.12805295 4.09563471 3.99813519 3.91032449 3.8301785 3.80416029 3.74460979 3.67126437 3.58733458 3.56959985 3.51640556 3.45438719 3.43756828 3.40194273 3.35574239 3.32903049 3.31380818 3.27878712 3.20330855 3.19285226 3.16719808 3.08615321 3.07116474 3.04951639 3.03380912 2.99878743 2.98327526 2.94965612 2.91489505 2.9073239 2.85565536 2.84606679 2.82399441 2.80614573 2.77705811 2.76940614 2.75753528 2.71056865 2.69862847 2.67359884 2.65470587 2.6395326 2.60924735 2.60005914 2.58879679 2.58582988 2.56581165 2.55054854 2.53768445 2.51830938 2.49880812 2.48298675 2.46931725 2.46283005 2.43731035 2.43380251 2.4133718 2.40731748 2.39564693 2.37456268 2.36746008 2.34407177 2.33587671 2.31250073 2.30316423 2.29500956 2.28732246 2.27488828 2.26712904 2.26558398 2.24026089 2.2372965 2.21790381 2.20531794 2.19930633 2.19229854 2.17959299 2.16954389 2.16097377 2.15467464 2.1451247 2.13994932 2.1302084 2.11974987 2.10556746 2.09794647 2.08795482 2.07455316 2.06569159 2.05831111 2.05058896 2.04394041 2.03057725 2.02560438 2.00908785 1.99615345 1.98981393 1.985957 1.97103102 1.96755826 1.95504129 1.94706733 1.93834948 1.92710528 1.91468345 1.90717849 1.90624725 1.89403252 1.88413814 1.87368877 1.8737153 1.8561651 1.85250714 1.83536229 1.8238152 1.82215993 1.80972701 1.8051833 1.79077091 1.78619898 1.77286746 1.75359955 1.74645837 1.7328366 1.7143234 ]
Total Variance Explained by all components: 0.47323643557300404
Shape of the reduced data: (56318, 168)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470218e+01 -6.45674031e+00 1.28157024e+01 4.84912814e+01 2.18300035e+00 2.26881568e+00 3.27898774e+01 4.25133171e+01 2.81328334e+01 3.21981215e+01 1.59423919e+01 2.61139113e+01 9.76991568e+00 -6.71713208e+00 6.66852910e+00 1.29680457e+00 5.85248068e-01 -7.83456712e+00 1.21212690e+01 -7.41438500e+00 1.77332748e+01 -4.35015841e+00 2.03572127e+01 6.99006504e+00 1.29644929e+01 6.95262244e+00 1.56921566e+01 1.44050895e+01 1.17774239e+01 -8.78874238e+00 -7.21246277e+00 1.73198109e+01 -1.05545975e+01 -8.57888237e+00 -6.22099774e+00 -8.38564371e+00 1.10216267e+00 -2.74514131e+00 -7.14413287e+00 1.89712010e+00 3.03918039e+00 5.28003339e+00 -2.62830964e+00 1.18656522e+01 6.81631432e+00 1.34075652e+01 8.42959587e-01 1.49210961e+01 6.72100493e+00 5.54015328e+00 3.64449822e+00 2.11765193e+00 2.96801938e+00 -2.94803631e+00 4.68967120e+00 5.67310346e+00 -4.07800135e+00 -2.63051113e+00 1.66480234e+00 1.73700932e+00 -4.41948011e+00 -2.32734500e+00 -6.13949422e+00 -3.73523373e+00 -1.15999201e+01 -7.01262050e+00 -3.99850770e+00 2.07213599e+00 3.14813282e+00 -5.95524590e+00 1.44170450e+01 -7.33361117e+00 -2.80215796e-01 4.46756669e+00 3.20046661e+00 3.91369726e+00 -4.58303453e-01 -7.37502316e+00 -2.80344520e+00 -1.66510217e+00 6.34415273e+00 -1.76737556e-02 7.87351914e+00 4.52847609e+00 -5.64690881e-01 1.97196949e+00 -5.65308815e+00 -2.86830505e+00 2.72618859e-01 -2.05378359e-01 -5.56671201e+00 2.99938172e+00 3.02371857e+00 2.58873384e+00 1.37658001e+00 -1.66861873e+00 -2.97162967e-02 3.15362558e+00 6.76658174e-01 2.02062419e+00 4.85763166e+00 -8.85210629e-01 -1.60757681e-01 -2.90101439e+00 6.14269701e+00 -3.14730783e+00 1.02965441e+00 -1.91834586e+00 -3.17766110e+00 4.92758861e+00 -1.08212641e+00 8.49865681e+00 -8.10740917e-01 7.89793120e+00 -1.77157442e+00 -6.47396916e-01 -1.23867166e+00 -3.87394219e-01 4.03872831e+00 5.63559412e-01 -2.62694945e+00 -1.09992254e+00 -1.66024345e+00 -3.01089784e-01 5.22975851e+00 1.90674634e+00 -5.54199981e+00 4.90805180e+00 -6.78627094e-01 1.40957064e+00 6.98311686e-01 5.14927765e+00 -2.28575421e+00 -3.53672516e+00 3.36727376e+00 1.11790536e+00 3.82061886e+00 -9.27336676e-02 -4.96049887e-02 4.58163571e+00 1.03943508e+00 2.64665260e+00 -1.17668456e+00 9.75755726e-01 4.46669236e-01 5.12158770e+00 1.73782202e+00 1.26961580e+00 5.49285541e+00 1.18339027e+00 1.26344232e+00 -2.06674238e+00 7.37600120e-01 1.99009532e+00 8.96243566e+00 -5.85710484e-01 -5.84685733e-02 -4.35887223e+00 3.59732821e+00 9.46606812e-01 -5.17589537e+00 2.46509291e+00 3.52817434e+00 1.62573887e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387650e+00 5.83568897e+00 -1.96499954e+00 -8.10543452e+00 6.02060866e+00 3.19552386e+01 1.31808243e+01 1.59283834e+01 3.10155026e+01 1.88029127e+01 2.58402518e+01 2.62850002e+00 8.23545861e+00 1.47738161e+01 1.40476858e+01 2.42933205e+00 -4.03646996e+00 1.38384933e+01 7.13874474e-01 -3.92804261e+00 8.39280357e+00 -9.63666834e+00 -1.10409326e+01 1.78188304e+00 1.60605032e+01 -1.52919367e+00 1.73874823e+00 1.96933455e+01 2.77714040e+00 1.41819935e+00 -4.78984861e-02 4.04204607e+00 7.15880220e+00 -2.97603574e+00 8.92545534e+00 -5.20503233e+00 -7.67342323e+00 5.00380628e+00 -1.88218167e+00 5.00335673e+00 -4.62107851e+00 1.30864548e+00 2.65535877e+00 -1.60200032e+00 1.04003485e+01 -9.86490841e+00 1.03584478e+01 5.92337901e+00 1.49904867e+01 1.20148763e+00 8.65037558e+00 4.31801168e+00 1.19149528e+00 -7.44420653e+00 5.85868364e-01 2.23710484e+00 -2.18292312e-02 3.20208226e+00 -6.11246539e+00 -1.84854089e+00 8.28114987e+00 -3.09985196e+00 4.92653011e+00 -6.58173898e+00 -2.76525386e+00 8.08288754e-01 4.24883860e-01 -4.15765497e+00 -3.06995618e+00 -8.43132862e-01 -1.13524535e+00 5.31398487e+00 -5.79898420e+00 5.04171298e+00 -1.96923037e+00 3.59109258e+00 4.13773927e+00 1.84423191e+00 -5.47525940e+00 -2.98655248e-01 9.23499311e-01 -3.81570146e+00 3.20742555e-01 -4.22229914e+00 7.67854557e-01 -3.62677730e+00 3.57362884e-01 1.88815801e+00 -5.09993825e+00 -1.31047317e+00 4.32560525e-01 5.39549378e+00 6.99519663e-01 2.09982507e+00 -2.02914718e+00 6.82731423e-01 2.20981727e+00 7.11562327e+00 -3.26933552e+00 -1.06162570e+00 -4.10341132e+00 6.20432036e+00 4.40754633e-01 -2.83324530e+00 -2.00931696e+00 5.81594534e+00 -5.20869067e-01 2.25942090e+00 -3.83319379e+00 -5.53588959e+00 -3.16623009e-01 -1.68789390e+00 -1.37348067e+00 1.11774668e+00 -1.31494329e+00 5.74541988e-01 4.11348076e-01 -1.65275131e+00 5.25039150e+00 3.74721222e+00 2.03287428e+00 -2.93048084e+00 1.49128102e-01 -9.45836644e-01 -2.99585565e+00 5.32498736e+00 -1.57531835e+00 -3.11580143e-01 3.45921344e+00 -4.84604596e+00 -1.58035136e+00 2.12196769e+00 2.17027540e+00 1.04609651e+00 1.11771942e+00 2.83769645e+00 -2.75043668e+00 1.26367835e+00 1.19803501e+00 6.34458129e-01 -1.32603796e+00 -3.11676171e+00 2.14255136e+00 1.00005183e+00 -3.02148429e+00 -1.40977710e+00 5.54866029e-01 1.01861637e+00 -1.90890998e+00 1.32015765e-01 -2.53117743e-01 3.90543191e+00 2.32306797e+00 2.40820827e+00 9.19427314e-01 5.50536841e+00 4.86856667e-01 3.68662395e+00 2.76912123e+00 -2.04189231e+00 9.97291290e-01 -2.15815085e-01 2.81639760e+00 9.15269430e-01 -1.39053588e+00]]
******** n_components=178 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.0025908 0.0025124 0.00242962 0.00233462 0.00227578 0.00215717 0.00214654 0.00208898 0.00202043 0.00197383 0.00191183 0.0018773 0.00184391 0.00182904 0.0017859 0.0017406 0.00169165 0.00167836 0.00163834 0.00160241 0.00156955 0.00155891 0.00153451 0.00150447 0.0014701 0.00146271 0.00144114 0.00141574 0.00140889 0.00139431 0.00137517 0.00136415 0.00135816 0.00134348 0.00131302 0.00130876 0.00129819 0.00126546 0.00125939 0.00125028 0.0012434 0.00122935 0.00122287 0.00120967 0.0011949 0.00119145 0.00117072 0.001167 0.0011577 0.00114946 0.00113876 0.00113628 0.00112917 0.00111108 0.00110622 0.00109779 0.00108914 0.00108261 0.00107094 0.00106793 0.00106339 0.00106126 0.00105169 0.00104643 0.0010408 0.00103404 0.00102338 0.0010204 0.00101376 0.00100582 0.00100197 0.00099811 0.00099215 0.00098813 0.00098547 0.00097539 0.00097307 0.00096597 0.00096267 0.00095121 0.00094643 0.00094211 0.00094087 0.00093931 0.00093547 0.00093199 0.00092144 0.00091302 0.0009111 0.00090554 0.00090355 0.0008993 0.0008972 0.00089438 0.00088961 0.0008871 0.00088173 0.00088131 0.00087829 0.00087239 0.00087168 0.00086502 0.00086433 0.00085888 0.00084961 0.00084905 0.00084865 0.00084358 0.00083872 0.00083586 0.00083459 0.0008278 0.00082486 0.00082201 0.00081709 0.000815 0.00080884 0.00080484 0.00079928 0.00079638 0.000796 0.00079245 0.00079087 0.00078689 0.00077847 0.00077752 0.00077381 0.00076976 0.00076746 0.00076437 0.00076224 0.00075206 0.00074972 0.00074797 0.0007457 0.00074249 0.0007354 0.00073307 0.0007291 0.00072383 0.00072137 0.00071777 0.00071247 0.00071144 0.00070749 0.0007061 0.00070132 0.00069788 0.00069331 0.00068581 0.00068438]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389614 16.97351606 15.56367999 13.87362382 13.43690363 12.72377823 11.63902532 10.8223519 10.37420571 9.86664461 9.67265817 8.99693944 8.43765707 8.00106852 7.58945288 7.46718884 7.08493975 6.63001982 6.45683225 6.32239806 6.13108334 5.92906892 5.69725613 5.55366082 5.26420076 5.23826218 5.09779937 4.93051541 4.81678845 4.6654856 4.5812231 4.49975837 4.46347351 4.35818926 4.24764828 4.12818481 4.09576215 3.99808179 3.91041094 3.83021147 3.80424499 3.74471481 3.67140123 3.58753115 3.56950226 3.51684778 3.45487104 3.43816766 3.40256956 3.35587502 3.3289706 3.31436722 3.27854541 3.20420018 3.19380562 3.16802061 3.08813678 3.07332748 3.05108954 3.03431179 3.00002599 2.98420815 2.95199529 2.91596109 2.90753347 2.85695113 2.84786127 2.82518259 2.80506024 2.77895105 2.77289608 2.75554915 2.71141312 2.69954989 2.67895892 2.6578651 2.64192222 2.61345521 2.60609843 2.59502119 2.589818 2.5664728 2.55363655 2.53989174 2.52339773 2.49737662 2.49010906 2.47390134 2.45453047 2.44513477 2.43572019 2.42117517 2.41135788 2.40486388 2.3802765 2.37460988 2.35728667 2.34923144 2.32127225 2.30961047 2.29905581 2.29604429 2.29222104 2.2828661 2.27435177 2.24861925 2.22807965 2.22339476 2.20980691 2.2049581 2.19458225 2.18945762 2.18259262 2.1709492 2.16481361 2.1517137 2.15067804 2.14332923 2.12891838 2.1271852 2.11093039 2.10924408 2.09595211 2.07332078 2.0719669 2.07099497 2.05860202 2.04674293 2.03976456 2.03666923 2.02010204 2.01294224 2.00597994 1.99396745 1.98885896 1.97384597 1.96406401 1.95050155 1.94342839 1.94250105 1.93384423 1.92997893 1.92027931 1.89973168 1.89740539 1.88835295 1.87847213 1.87284461 1.86531681 1.8601164 1.83527084 1.82956057 1.82530611 1.8197568 1.81192373 1.79461338 1.78893323 1.77924821 1.76637792 1.76038513 1.75158531 1.73867384 1.73614087 1.72651047 1.72312243 1.71145151 1.70304775 1.69190592 1.67361003 1.67011953]
Total Variance Explained by all components: 0.4807552975592656
Shape of the reduced data: (56318, 178)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470218e+01 -6.45674030e+00 1.28157029e+01 4.84912830e+01 2.18299697e+00 2.26882083e+00 3.27898849e+01 4.25133091e+01 2.81328312e+01 3.21981466e+01 1.59423581e+01 2.61139529e+01 9.76985420e+00 -6.71727626e+00 6.66849647e+00 1.29677271e+00 5.85295966e-01 -7.83435835e+00 1.21213665e+01 -7.41389341e+00 1.77335571e+01 -4.35159952e+00 2.03580662e+01 6.99088168e+00 1.29650472e+01 6.95645453e+00 1.56957261e+01 1.44081964e+01 1.17746407e+01 -8.79339780e+00 -7.21103110e+00 1.73335236e+01 -1.05395787e+01 8.59173486e+00 -6.24557163e+00 -8.39837884e+00 1.11053393e+00 -2.74176700e+00 -7.15861275e+00 1.93681670e+00 3.02852368e+00 5.34950637e+00 -2.66109886e+00 1.18322673e+01 6.86254816e+00 1.34178572e+01 8.36747033e-01 1.49025907e+01 6.76411060e+00 5.62704386e+00 3.68263421e+00 -1.92380068e+00 3.10573613e+00 -3.00924578e+00 4.40191447e+00 5.82441364e+00 -4.14278549e+00 -2.92107978e+00 1.60808499e+00 1.79763983e+00 -4.26198107e+00 -2.08837759e+00 -6.56573256e+00 -3.41616881e+00 -1.10107833e+01 -7.84151389e+00 -3.55760395e+00 2.00181178e+00 2.70164215e+00 -7.83581603e+00 1.27232389e+01 7.67452911e+00 -6.39842795e-01 4.51896386e+00 3.78142912e+00 3.11429675e+00 -1.15737284e+00 8.57861994e+00 -3.01190096e+00 9.99106111e-01 -7.74327745e+00 -1.78664328e+00 -6.76462999e+00 5.47476938e+00 -4.44429130e-01 1.92823432e+00 -4.49223703e+00 1.39092168e+00 5.49433005e-01 -2.40151506e-01 -5.30120098e+00 2.92249917e+00 -7.69047771e-01 -8.52142003e-01 -1.56746718e+00 1.98711616e+00 -2.61909589e+00 -2.22400140e+00 -1.33857808e+00 -6.22542678e-03 -3.90291527e+00 7.47664497e-01 2.93527111e+00 8.79191551e-01 -4.08724175e-01 -3.30788580e+00 -5.85228194e+00 -5.42031194e+00 2.73561337e+00 6.56152183e+00 4.32410065e+00 4.94095527e+00 1.07896327e+00 -2.64253192e+00 3.99056889e+00 7.21497606e+00 -5.44200966e+00 5.71796081e+00 -1.71746569e-01 5.29965752e-01 2.87063719e+00 -8.07046723e-02 5.99703772e-01 1.09754789e-01 1.44179480e+00 -9.80225525e-01 2.86701748e-01 -2.96768006e+00 8.13238679e+00 -6.31949763e+00 -2.84370398e+00 1.26609595e+00 1.20646170e+00 1.26331120e+00 2.89286832e+00 -9.82652059e-01 -6.70208306e+00 -4.72684840e-01 -1.31973028e+00 -1.20144863e+00 2.92667717e+00 9.36744117e-01 -2.12606479e+00 -2.40996572e+00 1.24667783e+00 -1.20576403e+00 -9.55256368e-01 7.13186045e+00 -9.72205409e-01 -2.06621437e+00 -2.18737290e+00 -3.49451490e+00 7.29649055e-01 -7.73059023e-01 5.78932030e-01 4.15239788e+00 -1.33720352e+00 -3.03076129e+00 -1.71950081e+00 1.75995747e+00 -2.95296000e-01 -2.77795302e+00 1.26109901e+00 2.07675406e-01 1.57580633e+00 -1.75349389e+00 -4.62736580e+00 5.73661340e+00 2.96268063e+00 -3.27732831e+00 2.56250034e+00 2.14561621e+00 -2.61151174e+00 -6.08572270e-01] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387651e+00 5.83568898e+00 -1.96499959e+00 -8.10543453e+00 6.02061011e+00 3.19552371e+01 1.31808326e+01 1.59283824e+01 3.10155135e+01 1.88029050e+01 2.58402254e+01 2.62846097e+00 8.23548510e+00 1.47737847e+01 1.40476017e+01 2.42923232e+00 -4.03656335e+00 1.38386721e+01 7.13902306e-01 -3.92755647e+00 8.39301353e+00 -9.63542381e+00 -1.10439850e+01 1.78389262e+00 1.60612221e+01 -1.53324406e+00 1.73737575e+00 1.96921938e+01 2.77483704e+00 1.41917055e+00 -4.83603952e-02 4.05003175e+00 7.17215553e+00 -2.97693900e+00 -8.94181056e+00 -5.19293265e+00 -7.68763694e+00 4.97857998e+00 -1.85548765e+00 5.00500470e+00 -4.61831158e+00 1.31903466e+00 2.68646670e+00 -1.57456394e+00 1.04488514e+01 -9.90772592e+00 1.03557404e+01 5.87850645e+00 1.49123623e+01 1.19007784e+00 8.94403741e+00 4.19374031e+00 -1.47281323e+00 -7.49359214e+00 6.41472504e-01 2.18277470e+00 1.57454629e-01 3.51168924e+00 -6.06233317e+00 -2.00189914e+00 8.28838860e+00 -2.71355688e+00 5.21837457e+00 -6.47541958e+00 -2.27247955e+00 1.01326696e+00 3.95110125e-01 -4.30874618e+00 -3.16478514e+00 -8.38894527e-01 -1.41765521e+00 4.19366447e+00 6.15028383e+00 4.63309103e+00 -2.13841451e+00 2.72422151e+00 4.54916315e+00 5.63565351e-01 6.18442558e+00 7.27784208e-01 -3.14383654e-01 5.08556067e+00 1.12868479e+00 2.39199838e+00 5.58534351e-01 5.11516993e+00 1.02494830e+00 1.55508295e+00 5.52604693e+00 9.23500908e-02 2.36221386e-01 5.70621636e+00 -2.00692296e-01 -2.98040429e+00 1.16721049e+00 -1.56071600e+00 5.74771709e+00 1.41091275e+00 7.16724826e-01 5.28027630e+00 -3.41018212e+00 -3.27346390e+00 -1.97805895e+00 2.44366813e+00 1.97679540e+00 1.80385749e+00 -1.25128898e+00 -3.48138792e+00 -6.25692735e+00 2.83194490e+00 2.51975423e+00 -2.41690303e+00 3.83541798e-01 -4.01842026e+00 1.76388662e+00 3.60222746e+00 -7.34871438e-01 2.71583740e+00 -8.86080028e-01 -7.94560697e-01 8.79735426e-01 2.40717961e+00 -1.26242305e+00 -3.57513629e+00 -5.55069601e+00 -1.97419920e+00 2.67322843e+00 -8.27881930e-02 2.30332546e-01 1.80518895e+00 -2.10905117e+00 1.85536849e-01 2.45663609e+00 -1.22144710e+00 4.64751538e-01 2.03729594e+00 -2.17950011e+00 -4.77308680e+00 -2.25266721e-01 3.73902531e+00 2.92226998e+00 3.05456387e+00 -4.48394919e+00 -2.99723075e+00 -2.47498440e+00 -6.02011337e-01 8.62787221e-01 9.71411697e-02 4.39636718e+00 -2.11523748e+00 -9.99898153e-01 -1.91859100e+00 -3.44071689e+00 9.56455624e-01 -1.88243675e+00 -2.76329493e+00 2.84020899e+00 8.00228439e-01 -4.25339322e+00 -1.46393735e+00 -1.82632755e-02 -3.12378052e-01 -1.00963842e+00 -2.50109015e+00 -1.21721078e+00 -2.98044371e+00 1.94372093e+00 -2.86457335e+00 -2.29917997e+00 4.59311524e+00 1.55324103e+00 8.76495085e-01 1.90269450e+00 -6.44956177e-01 4.49386935e+00]]
******** n_components=188 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.0025908 0.0025124 0.00242962 0.00233463 0.00227578 0.00215717 0.00214654 0.00208898 0.00202043 0.00197383 0.00191183 0.0018773 0.00184392 0.00182905 0.00178592 0.0017406 0.00169164 0.00167837 0.00163836 0.00160241 0.00156963 0.0015589 0.0015345 0.00150448 0.00147017 0.00146273 0.00144113 0.00141578 0.00140899 0.00139436 0.00137523 0.00136433 0.00135836 0.0013436 0.00131308 0.00130872 0.00129848 0.00126549 0.00125965 0.00125008 0.00124364 0.00122906 0.00122291 0.00120923 0.00119542 0.00119189 0.00117106 0.00116751 0.00115855 0.00115057 0.00113876 0.0011363 0.00113117 0.00111109 0.00110695 0.00109921 0.00109007 0.00108305 0.00107215 0.0010683 0.00106346 0.00106181 0.00105347 0.0010472 0.00104091 0.00103497 0.00102463 0.00102081 0.00101541 0.00100915 0.00100275 0.00100104 0.00099193 0.00098976 0.00098832 0.00097598 0.000973 0.00096775 0.00096378 0.00095353 0.00095029 0.0009446 0.00094247 0.0009389 0.00093607 0.00093347 0.00092227 0.00091895 0.00091571 0.00090746 0.00090593 0.00090279 0.00089894 0.00089609 0.00089434 0.00089089 0.00088827 0.00088398 0.00088178 0.00087955 0.0008736 0.00086752 0.00086679 0.00085868 0.00085618 0.00085182 0.00084955 0.00084268 0.00084268 0.00083869 0.00083281 0.00083114 0.00082908 0.00082421 0.00082105 0.00081967 0.00081213 0.00081019 0.00080573 0.00080361 0.00079654 0.00079485 0.00079345 0.00078826 0.00078629 0.00078224 0.00077643 0.00077375 0.0007697 0.00076974 0.00076613 0.00076367 0.00076301 0.00075729 0.00075441 0.0007492 0.00074703 0.00074242 0.00074222 0.0007392 0.00073424 0.00073199 0.00072851 0.00072234 0.00071938 0.00071296 0.00070951 0.00070862 0.00070571 0.0007013 0.00069692 0.00069384 0.00069165 0.0006902 0.00068479 0.00068194 0.00067637 0.0006731 0.00067142 0.0006692 0.00066773]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389615 16.97351607 15.56368001 13.87362376 13.43690363 12.72377825 11.63902525 10.82235195 10.37420573 9.86664449 9.67265811 8.99693939 8.43765705 8.00106851 7.58945303 7.46718932 7.08494057 6.63001863 6.45683249 6.32240039 6.13108303 5.92906949 5.6972585 5.55366436 5.26420195 5.23826193 5.09780159 4.9305184 4.81679755 4.66548517 4.58123938 4.49976269 4.46348349 4.35822896 4.24764646 4.12816117 4.09578567 3.99812894 3.91041361 3.83040406 3.80422222 3.74467738 3.67142819 3.5876998 3.56955072 3.51682181 3.45497778 3.43839394 3.40270706 3.35600901 3.32941851 3.31483993 3.27883194 3.20434956 3.19370768 3.16872114 3.08820436 3.07395634 3.05061684 3.03488522 2.99931566 2.98429618 2.95091428 2.91722704 2.90860451 2.85778111 2.84909869 2.82723665 2.80776889 2.77895539 2.77293639 2.76041949 2.71141901 2.70133342 2.68244764 2.66013096 2.64299873 2.61640035 2.60700516 2.59520336 2.59115846 2.570804 2.55552044 2.54017567 2.52566454 2.50043368 2.49110704 2.47792967 2.46266956 2.44704122 2.44285782 2.42063107 2.41534519 2.41181614 2.38172182 2.37444052 2.36163798 2.35195138 2.32693405 2.3190194 2.30514747 2.29994837 2.29123243 2.28431998 2.27798751 2.25063713 2.24255034 2.23464616 2.21450856 2.21077993 2.2031182 2.19371771 2.18676244 2.18247676 2.17405913 2.16768196 2.15719648 2.15182915 2.14640038 2.13186587 2.11703587 2.11525989 2.09546764 2.08936694 2.07873184 2.07318507 2.05640877 2.05642849 2.04668794 2.03232531 2.02824775 2.02323802 2.0113378 2.00364449 2.0002602 1.98185478 1.97713112 1.9662459 1.96107324 1.94380912 1.93969036 1.9362749 1.92361983 1.91881653 1.90892406 1.89474064 1.88820478 1.87832529 1.87842695 1.86960324 1.86359976 1.86199426 1.84803967 1.84102087 1.82829021 1.82299481 1.81174537 1.81126928 1.80389075 1.79179486 1.78628989 1.77779971 1.76275907 1.75551731 1.73985728 1.73143698 1.72927306 1.72215649 1.71140869 1.70071111 1.69320905 1.68784744 1.68431866 1.67111528 1.66416706 1.65056411 1.64258492 1.63849762 1.63306723 1.62947105]
Total Variance Explained by all components: 0.48798305346769244
Shape of the reduced data: (56318, 188)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470218e+01 -6.45674026e+00 1.28157025e+01 4.84912827e+01 2.18299731e+00 2.26881728e+00 3.27898818e+01 4.25133083e+01 2.81328277e+01 3.21981402e+01 1.59423799e+01 2.61139339e+01 9.76995408e+00 -6.71708840e+00 6.66858948e+00 1.29681255e+00 5.85277485e-01 -7.83504931e+00 1.21205395e+01 -7.41431832e+00 1.77301926e+01 -4.35138155e+00 2.03581621e+01 6.99028594e+00 1.29669635e+01 6.95469663e+00 1.56956525e+01 1.44027222e+01 1.17796211e+01 -8.79044422e+00 -7.21229262e+00 1.73263360e+01 -1.05525176e+01 8.57240843e+00 -6.22157474e+00 -8.40182503e+00 1.10069462e+00 -2.72230897e+00 -7.17087967e+00 1.91132424e+00 3.01190769e+00 5.29190461e+00 -2.69247138e+00 1.18553384e+01 6.84586599e+00 1.33998684e+01 8.24361265e-01 1.50689870e+01 6.48062101e+00 5.75788469e+00 3.62324160e+00 -1.96688553e+00 3.04406314e+00 -3.00834851e+00 4.18541057e+00 5.93516948e+00 -4.11905897e+00 -2.68172025e+00 1.70396874e+00 1.85531836e+00 -4.24597632e+00 -1.94145670e+00 -6.82945251e+00 -3.81875984e+00 -1.08607670e+01 -7.87689570e+00 -3.35156051e+00 1.95776352e+00 2.80509010e+00 -4.95498370e+00 1.43321325e+01 7.84936676e+00 -9.36635156e-01 4.21344570e+00 4.54872666e+00 3.01653626e+00 6.57287433e-01 -7.75655002e+00 -2.98955189e+00 -1.42409035e-01 -8.89137123e+00 -1.79749620e+00 6.88282421e+00 3.69535362e+00 -4.50459435e-02 8.47556453e-01 -4.55645551e+00 -9.14983020e-01 1.40410794e+00 8.43633957e-01 -5.27415972e+00 2.04320350e+00 -1.35185039e+00 -2.69406857e+00 -1.80119649e+00 1.94801444e+00 3.52863132e+00 2.78003550e+00 -7.99854923e-01 4.85059778e-01 4.05469612e+00 1.55974923e+00 -4.27212714e+00 2.26338356e+00 -1.01065191e+00 -6.89002478e+00 5.03986543e-01 -1.04412332e+00 6.96015600e+00 -5.81557083e-01 6.61319916e+00 -1.64044130e+00 1.74518254e+00 8.72412171e+00 4.41820239e+00 1.75818809e+00 2.97617751e+00 5.02377503e-02 -1.00125576e+00 1.75757311e-02 1.65916562e-01 -8.98132246e-01 -6.98070464e-01 -2.81040531e-01 -3.25674327e+00 4.94641689e+00 -3.83722092e+00 8.04383255e-01 7.75186892e+00 -2.74366694e+00 1.87469914e+00 -5.10564502e+00 5.77292227e+00 3.15572483e+00 -4.08006393e-01 4.57189529e+00 4.89145986e-01 -7.11559470e-01 1.51886988e+00 1.40906816e+00 -4.00160608e+00 3.85771381e+00 -4.88639052e+00 9.60873276e-01 3.97479266e+00 2.28193243e+00 -1.29241014e+00 1.14359243e+00 1.03416595e+00 1.38274460e+00 2.63533271e+00 -3.00754668e+00 -4.13292011e+00 -5.66336072e+00 -8.39065199e-01 -1.56182892e+00 2.90286032e+00 1.19622480e+00 -1.15234104e+00 -2.44937557e+00 -1.59904386e+00 1.04186902e+00 -3.97340001e+00 -1.87423974e+00 -1.89227449e+00 -3.28510926e+00 2.87474113e+00 1.23509838e+00 -3.49187102e-01 -1.91449533e+00 1.29742830e-01 -2.46654255e+00 6.43030967e-01 -2.06323044e+00 -1.71924683e+00 2.83969564e-01 -2.23103984e+00 5.58199173e+00 1.64155105e+00 -8.16787804e-01 4.38849063e+00 3.14677606e-01 -1.48689138e+00 -3.42603489e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387651e+00 5.83568898e+00 -1.96499959e+00 -8.10543473e+00 6.02060945e+00 3.19552383e+01 1.31808267e+01 1.59283872e+01 3.10155045e+01 1.88029114e+01 2.58402412e+01 2.62846429e+00 8.23554287e+00 1.47738503e+01 1.40477381e+01 2.42934937e+00 -4.03664836e+00 1.38382999e+01 7.13877917e-01 -3.92820412e+00 8.39284263e+00 -9.63763171e+00 -1.10431324e+01 1.78171252e+00 1.60610597e+01 -1.52672404e+00 1.73672081e+00 1.96903059e+01 2.77487188e+00 1.41905611e+00 -5.19760934e-02 4.04890543e+00 7.14467805e+00 -2.98074079e+00 -8.94227827e+00 -5.20874767e+00 -7.67523608e+00 4.98566611e+00 -1.86826687e+00 4.99913245e+00 -4.65199365e+00 1.31110006e+00 2.67216873e+00 -1.55161967e+00 1.04429863e+01 -9.93298724e+00 1.03082763e+01 5.91865752e+00 1.49740258e+01 9.95278742e-01 8.99334464e+00 4.19976409e+00 -1.39043616e+00 -7.62318030e+00 5.13710711e-01 2.31417924e+00 2.85738047e-01 3.37324619e+00 -6.25410390e+00 -1.85139380e+00 8.24068503e+00 -2.75475104e+00 5.23516830e+00 -6.53635262e+00 -2.13038875e+00 1.16081106e+00 3.19614948e-01 -4.36130044e+00 -3.01394958e+00 -1.03645743e+00 -4.67432708e-01 4.61742903e+00 6.21615678e+00 4.18148357e+00 -2.08969515e+00 2.55932796e+00 4.24143279e+00 1.57978155e+00 -6.51612972e+00 -3.27175641e-01 -2.20328263e-01 3.83603682e+00 3.34255041e-01 -2.65650828e+00 -1.05889624e-01 4.67249961e+00 8.07852963e-01 2.50243549e+00 -5.62039479e+00 2.07701155e-01 1.19823448e+00 3.73594965e+00 6.63240996e-01 -3.03741698e+00 1.33317403e+00 1.70202060e+00 5.17846719e+00 -2.11587677e+00 -1.77385877e+00 5.34176419e+00 -5.09703928e+00 1.69263275e-01 6.79141562e-01 -4.26369875e+00 1.00108749e+00 -2.91277788e+00 -3.19848684e+00 1.10562706e+00 -7.50393072e-01 6.70005525e+00 8.18454255e-03 1.52988990e-01 2.62790949e+00 1.90770987e+00 -6.37523675e-02 2.81479885e-01 -8.37500769e-01 1.60293108e+00 -4.66123625e+00 3.19995128e+00 5.30864226e+00 2.41395143e+00 -3.16024632e+00 -3.91274675e+00 -3.81357886e+00 3.84271081e-01 -1.63354533e+00 -1.18453102e+00 2.09282896e+00 2.49056077e+00 4.47667504e-01 3.28014591e+00 -3.29444046e+00 1.76214068e+00 4.25761743e+00 -1.72615858e+00 2.67026310e+00 2.33712918e+00 -9.76185222e-01 1.63516793e+00 9.43460167e-01 -4.27234940e+00 5.59979869e-01 -6.40559572e-01 -2.26731426e+00 3.09010170e+00 1.10817458e-01 3.02546352e+00 -2.69010541e+00 1.18956839e-02 1.50615231e+00 -3.57955233e+00 -2.33899913e+00 6.06664678e-02 -1.72162165e+00 -4.67493712e-01 -3.45054699e+00 -3.61867524e+00 -1.22492333e+00 -1.03360987e+00 -2.59299402e+00 -2.28242777e-01 7.57897135e-01 2.36115830e-01 8.88554596e-01 1.49431708e+00 1.73063072e+00 1.68202970e+00 -3.24536224e+00 4.73347324e-01 -2.96857205e+00 -2.91063611e-01 -2.51834232e+00 -2.95455494e-01 -5.63419664e+00 -9.77462612e-01 9.81478938e-01 -7.74810536e-01 1.84996437e+00 -2.39022967e+00 4.62417313e+00 4.98220031e+00 4.55247266e-01 -1.15150046e+00 1.98267304e+00]]
******** n_components=198 ********
Explained Variance Ratio per Component: [0.09722155 0.03288487 0.02784989 0.01907246 0.01673716 0.01475282 0.01010298 0.00927506 0.0078448 0.00695542 0.00637769 0.00568514 0.00550618 0.00521396 0.00476945 0.00443479 0.00425115 0.00404316 0.00396367 0.00368677 0.00345759 0.00327868 0.00311001 0.00305991 0.00290327 0.00271685 0.00264588 0.0025908 0.0025124 0.00242962 0.00233463 0.00227578 0.00215717 0.00214654 0.00208898 0.00202043 0.00197383 0.00191182 0.00187731 0.00184392 0.00182905 0.00178591 0.0017406 0.00169165 0.00167838 0.00163837 0.00160243 0.00156963 0.00155897 0.00153453 0.00150449 0.00147019 0.00146279 0.00144121 0.00141583 0.00140901 0.00139447 0.00137534 0.00136442 0.00135851 0.00134386 0.00131314 0.00130881 0.00129846 0.00126573 0.00125975 0.00125025 0.00124401 0.00122939 0.00122301 0.0012099 0.00119539 0.00119227 0.00117144 0.00116766 0.00115869 0.00115021 0.00113918 0.00113639 0.00113106 0.00111203 0.00110684 0.0010987 0.00108973 0.00108428 0.00107245 0.0010692 0.00106316 0.00106246 0.00105371 0.00104742 0.00104195 0.00103446 0.00102579 0.00102183 0.00101644 0.00100858 0.00100323 0.00100283 0.00099327 0.00099108 0.00098745 0.00097816 0.00097671 0.00096935 0.00096531 0.00095311 0.00094949 0.00094283 0.00094194 0.00094129 0.00093666 0.00093637 0.00092627 0.0009187 0.00091321 0.00090719 0.00090431 0.00090401 0.00090137 0.00090047 0.0008958 0.00089322 0.00088859 0.00088494 0.00088421 0.00087886 0.00087619 0.00087036 0.00086721 0.00086421 0.00086141 0.00085406 0.00085136 0.00084639 0.00084423 0.00084118 0.0008403 0.00083451 0.00083143 0.00082723 0.00082511 0.00081791 0.00081579 0.00081117 0.00080899 0.00080764 0.00080532 0.00080059 0.00079807 0.00079494 0.00078953 0.00078843 0.00078565 0.00078255 0.00077795 0.00077567 0.00076941 0.00076802 0.00076553 0.00076226 0.00076031 0.00075741 0.00075096 0.00074764 0.0007448 0.00074372 0.00074139 0.00073317 0.00073086 0.00072723 0.00072625 0.00072431 0.00071978 0.0007213 0.00071225 0.00071003 0.00070806 0.00070464 0.00070062 0.0006995 0.00069752 0.00069496 0.00069027 0.0006877 0.00068424 0.00068241 0.00067556 0.00067437 0.00066744 0.00066618 0.00065995 0.000658 0.00065906 0.00065245 0.00064699 0.00064369 0.00063914]
Explained Variance per Component: [237.2527247 80.24995646 67.96295031 46.54311306 40.84420643 36.0017507 24.65460654 22.63420233 19.14389614 16.97351606 15.56368001 13.87362375 13.43690363 12.72377827 11.63902527 10.82235192 10.3742057 9.86664448 9.6726581 8.99693936 8.43765709 8.00106839 7.58945317 7.46718912 7.08494025 6.63001863 6.45683277 6.32240136 6.1310837 5.92907001 5.69725979 5.55366442 5.26420274 5.23826307 5.09780399 4.93052158 4.81680857 4.66548454 4.58124897 4.49977018 4.46347932 4.35821954 4.2476418 4.12817431 4.09579235 3.99816019 3.91046176 3.83042208 3.8043967 3.74477106 3.67145369 3.58775545 3.56969757 3.51702276 3.45508548 3.43845396 3.40297942 3.35627501 3.32964411 3.31522071 3.27946756 3.20449446 3.19392156 3.16867262 3.0888094 3.07420538 3.05103237 3.03580554 3.00012988 2.98455782 2.95255577 2.91713772 2.90953714 2.85869972 2.84948415 2.82758206 2.8068878 2.77998236 2.77316697 2.76015569 2.713716 2.70104963 2.68120208 2.65931238 2.64600231 2.61712427 2.60921261 2.59445955 2.59274489 2.57138883 2.55605048 2.5427104 2.52441477 2.50327694 2.49360877 2.48044227 2.46126161 2.44821943 2.44724041 2.4239018 2.41856581 2.40970421 2.38703403 2.38348777 2.36552293 2.35566834 2.32590654 2.31706072 2.30081572 2.29864603 2.29705235 2.28576169 2.2850566 2.26039756 2.24192259 2.22852906 2.21383937 2.20681 2.20607123 2.19962878 2.19744002 2.18604575 2.17974062 2.16844816 2.15954301 2.15776663 2.14470953 2.13820386 2.12397294 2.11627209 2.10896418 2.10212218 2.08418364 2.07759832 2.0654635 2.06020708 2.05274708 2.05062101 2.03648072 2.02896478 2.01870922 2.01354498 1.99597877 1.99080289 1.9795314 1.97420299 1.97089665 1.96525136 1.95369833 1.94754904 1.93990608 1.92670206 1.92404001 1.91725215 1.90968661 1.89844724 1.89288581 1.87761156 1.87422341 1.86815794 1.86015712 1.85541475 1.84832861 1.83259344 1.82447725 1.81755394 1.8149258 1.80922783 1.78917733 1.78354411 1.774693 1.77228213 1.76756255 1.75649299 1.76020767 1.73812433 1.73271359 1.7278987 1.71956329 1.70975044 1.70701986 1.70217242 1.69592141 1.68449262 1.67822204 1.66976659 1.66530836 1.64857799 1.64569748 1.62876239 1.62568944 1.61048551 1.60572965 1.60831605 1.59220384 1.57888093 1.57081966 1.55972339]
Total Variance Explained by all components: 0.49496274517137284
Shape of the reduced data: (56318, 198)
Reduced data sample: [[ 1.33003764e+02 3.11663647e+01 1.23427211e+02 1.54590266e+01 -2.34470218e+01 -6.45674031e+00 1.28157024e+01 4.84912826e+01 2.18299819e+00 2.26881562e+00 3.27898757e+01 4.25133090e+01 2.81328521e+01 3.21981327e+01 1.59423821e+01 2.61139946e+01 9.76989935e+00 -6.71718769e+00 6.66851906e+00 1.29683160e+00 5.85032803e-01 -7.83430715e+00 1.21220498e+01 -7.41439205e+00 1.77307936e+01 -4.35172720e+00 2.03576145e+01 6.99062273e+00 1.29678666e+01 6.95318993e+00 1.56952953e+01 1.44028792e+01 1.17817869e+01 -8.79152491e+00 -7.21080199e+00 1.73206003e+01 -1.05618729e+01 8.58450377e+00 -6.22424367e+00 -8.41590150e+00 1.12727559e+00 -2.73803447e+00 -7.16715580e+00 1.92520407e+00 3.01766832e+00 5.31936464e+00 -2.67366217e+00 1.18902400e+01 6.85360471e+00 1.34467359e+01 8.03105616e-01 1.49346740e+01 6.85124620e+00 5.68741735e+00 3.62898688e+00 1.97359449e+00 3.06821969e+00 -2.98019613e+00 4.30539047e+00 5.96893164e+00 -4.11412308e+00 -2.69755548e+00 1.63963520e+00 1.77479923e+00 -4.15600226e+00 -1.62543483e+00 -6.76175644e+00 3.91454196e+00 -1.13654055e+01 -7.21550376e+00 -3.65575286e+00 2.07576430e+00 2.66680871e+00 -6.88648276e+00 1.37228372e+01 8.21763044e+00 -1.61369239e+00 3.46880395e+00 4.67068137e+00 3.55463030e+00 -5.73457229e-01 -8.75845435e+00 -3.13020747e+00 -2.62118480e-01 6.85537546e+00 -2.86643635e+00 -5.54359493e+00 -2.64417737e+00 4.77230337e+00 1.15549513e+00 -4.57989224e+00 -2.33722716e+00 1.58197265e+00 -2.80645284e+00 -5.61376848e+00 2.02148231e+00 2.47699202e+00 -1.65547054e+00 -1.34885799e+00 -7.16750639e-01 -3.29125682e+00 1.62035331e+00 -1.14409051e+00 -4.65258461e-01 2.43257488e+00 -3.36835426e+00 2.63955280e-01 4.80486060e+00 -3.73564056e+00 3.88681254e+00 4.22166065e+00 1.58145085e+00 -3.31797793e+00 6.06384390e+00 5.18980937e+00 9.84192076e-01 1.31859608e+00 3.42333767e+00 9.35413173e+00 -3.03289210e+00 3.05730131e+00 3.10303596e+00 -2.24844770e+00 2.94956920e+00 8.78409208e-02 1.42109552e+00 -5.46159959e-02 2.63905946e+00 -3.67957637e-01 4.25875366e+00 7.71253452e+00 3.00549106e+00 3.74132554e+00 -3.71675286e+00 3.54675586e+00 2.35743069e+00 5.36491087e+00 -4.37445235e-01 4.87482337e+00 -1.22141531e-01 -3.35360948e+00 -1.90298690e+00 2.54585333e+00 2.46983270e+00 -6.57665666e-01 -1.52038245e+00 7.35382727e-01 1.32045245e+00 7.90326649e-01 2.76886965e+00 2.44477815e-01 1.72609421e+00 -5.62987100e+00 -2.99457806e+00 8.49711192e-01 4.86790606e+00 1.68797841e-01 -8.20519390e-03 1.99349115e+00 -2.92590205e+00 -7.14365168e-01 -5.26325498e-01 -3.52705425e+00 -1.08885643e+00 -1.18255783e-02 3.65849663e+00 2.77802304e-01 -8.58464333e-01 -1.49942281e+00 3.82896800e+00 5.17965041e+00 1.32170857e+00 -1.97310615e+00 8.01862371e-03 -1.38576226e+00 1.58313932e+00 3.83741845e+00 -2.77012047e+00 -2.35421300e-01 4.73849475e+00 -3.93227708e+00 3.05269925e+00 -3.51740914e+00 3.32998092e+00 2.07842675e+00 -7.34792020e-01 7.94175280e+00 -4.13599467e+00 -6.62064496e+00 -1.20332256e+00 4.57958624e+00 2.01789086e+00 -4.47703297e-01 2.67588360e+00 4.93961973e-02 -2.32264834e+00 -6.63266603e-01 4.13154331e+00] [ 1.21155080e+02 5.69579855e+01 8.59574217e+01 -8.90387652e+00 5.83568898e+00 -1.96499955e+00 -8.10543465e+00 6.02060886e+00 3.19552388e+01 1.31808312e+01 1.59283732e+01 3.10155067e+01 1.88029056e+01 2.58402519e+01 2.62847122e+00 8.23556275e+00 1.47737876e+01 1.40477833e+01 2.42924438e+00 -4.03674831e+00 1.38384474e+01 7.14474752e-01 -3.92783447e+00 8.39347765e+00 -9.63593353e+00 -1.10430570e+01 1.78361091e+00 1.60588782e+01 -1.53027879e+00 1.73605477e+00 1.96944695e+01 2.77637684e+00 1.41851684e+00 -4.64049542e-02 4.04856262e+00 7.15409220e+00 -2.98168674e+00 -8.93655696e+00 -5.20823364e+00 -7.67340071e+00 4.98222129e+00 -1.86035649e+00 5.01396031e+00 -4.63354011e+00 1.31371340e+00 2.67238394e+00 -1.54899265e+00 1.04033860e+01 -9.94209636e+00 1.03475046e+01 5.86855860e+00 1.49403215e+01 1.34461547e+00 8.96657840e+00 4.28153711e+00 1.42077011e+00 -7.51681588e+00 5.66453231e-01 2.31862008e+00 1.00192222e-01 3.33168394e+00 -6.11252384e+00 -1.86576218e+00 8.46616961e+00 -2.60423026e+00 5.47291139e+00 -6.30173379e+00 2.38105986e+00 1.00770757e+00 4.78365194e-01 -4.30539559e+00 -3.60022149e+00 -8.65515988e-01 -5.83048102e-01 4.53639338e+00 6.44569544e+00 3.71469363e+00 -2.75227423e+00 2.23745581e+00 3.85385633e+00 1.48130863e+00 -6.23956033e+00 6.83810546e-02 2.44858458e-01 -4.34518614e+00 1.44744222e+00 1.88205382e+00 5.27120036e+00 7.44400206e-01 8.99714137e-01 2.09063883e+00 -5.28865947e+00 -8.09633400e-01 1.27625564e+00 4.79988356e+00 8.19155278e-01 3.68208892e+00 1.26780634e+00 1.17682360e+00 5.98593627e+00 -5.65649076e-01 4.83650761e-01 5.13142536e+00 4.14504631e+00 5.04869585e-01 -2.44534045e+00 3.21096518e+00 5.29263937e+00 -2.23528406e+00 2.59670050e+00 2.16874596e+00 3.51800389e+00 -5.97573310e+00 9.26074337e-01 -2.00723781e+00 -2.19934535e+00 2.52996294e+00 -1.25881072e-01 3.57934731e+00 -2.50456288e+00 1.68706114e+00 -1.66449420e+00 1.59899358e+00 -6.48016113e+00 1.71885692e+00 2.02567845e+00 -5.66839500e+00 2.45249402e+00 2.70429690e+00 1.81268150e+00 1.59944270e+00 3.67533901e-01 1.31971207e+00 -1.14909051e+00 4.92981523e+00 4.15988477e+00 4.15600174e+00 5.06034038e-01 3.53614437e+00 2.25178017e+00 -2.51944730e-01 2.85168077e+00 2.76007215e+00 -2.44488107e+00 1.32053212e+00 1.96437424e+00 -2.69178486e-01 2.70194895e+00 -4.51703843e-01 1.78589046e+00 1.51991366e+00 1.29347969e+00 -3.00117389e+00 2.18734789e+00 -8.96416969e-01 1.32241288e+00 2.72168783e+00 3.01367338e-01 3.13161148e+00 -3.08748600e-01 -3.23025157e+00 2.26724498e+00 -6.89764715e-01 -2.13991295e+00 -3.76996501e+00 6.56993162e-01 -1.97780333e-01 -2.61160386e-01 -1.67358713e+00 6.27330290e+00 -8.30934332e-01 -4.57022080e+00 -1.81129710e+00 3.50157303e-01 -1.20220020e+00 4.61066366e+00 1.50607962e-01 -2.45890769e-01 -6.13112914e-01 -2.17664423e+00 1.11517934e+00 7.46394436e-01 -8.88178707e-01 1.67962586e+00 2.47140427e+00 -7.74972435e-01 2.71087856e-01 1.16371752e-02 -5.48279067e+00 1.17901014e+00 2.96793752e+00 -3.25213160e+00 2.50965173e-01 -3.34579950e-01 -7.82811382e-01 -4.82679479e-01 1.24618659e+00 2.21065405e+00]]
time: 1min 46s (started: 2024-04-25 21:55:15 -07:00)
In [ ]:
pipe = make_pipeline(
TruncatedSVD(n_components=10),
memory="cache_svd_n")
grid = GridSearchCV(
pipe,
param_grid={
"truncatedsvd__n_components":np.random.randint(20, 500, 20),
},
scoring='explained_variance',
cv=5,
refit=False,
n_jobs=-1)
print(pipe)
grid.fit(X_fill_zero[:,1:])
2.2.1.2 Principle Component Analysis¶
In [ ]:
from sklearn.decomposition import PCA
plt.rcParams["font.family"] = "DejaVu Serif"
covar_matrix = PCA(n_components = len(X_fill_zero.columns)-1) #start from the number of all features we have
covar_matrix.fit(X_fill_zero[:,1:])
plt.ylabel( 'Eigenvalues')
plt.xlabel('# of Features')
plt.title('PCA Eigenvalues')
plt.ylim(0,max(covar_matrix.explained_variance_))
plt.style.context ('seaborn-whitegrid')
plt.plot(covar_matrix.explained_variance_)
plt.show()
time: 8.57 s (started: 2024-04-25 18:52:20 -07:00)
In [ ]:
from sklearn.decomposition import PCA
covar_matrix = PCA(n_components = 50) #components are equal to the number of features we have
covar_matrix.fit(X_fill_zero[:,1:])
plt.ylabel( 'Eigenvalues')
plt.xlabel('# of Features')
plt.title('PCA Eigenvalues')
plt.ylim(0,max(covar_matrix.explained_variance_))
plt.style.context ('seaborn-whitegrid')
plt.axvline(x=8, color='r', linestyle='--')
plt.plot(covar_matrix.explained_variance_)
plt.show()
time: 3.92 s (started: 2024-04-25 18:54:46 -07:00)
In [ ]:
from sklearn.decomposition import PCA
covar_matrix = PCA(n_components = 10) #components are equal to the number of features we have
covar_matrix.fit(X_fill_zero[:,1:])
kn = KneeLocator(range(0,10), covar_matrix.explained_variance_, curve='convex', direction='decreasing')
print(f"{kn.knee=}")
plt.ylabel( 'Eigenvalues')
plt.xlabel('# of Features')
plt.title('PCA Eigenvalues')
plt.ylim(0,max(covar_matrix.explained_variance_))
plt.style.context ('seaborn-whitegrid')
plt.axvline(x=kn.knee, color='r', linestyle='--')
plt.plot(covar_matrix.explained_variance_)
plt.show()
kn.knee=2
time: 2.32 s (started: 2024-04-25 19:00:01 -07:00)
In [ ]:
from kneed import KneeLocator
kn = KneeLocator(range(0,40), covar_matrix.explained_variance_, curve='convex', direction='decreasing')
print(f"{kn.knee=}")
6
time: 3.24 ms (started: 2024-04-25 18:54:19 -07:00)
In [ ]:
from kneed import KneeLocator
kn = KneeLocator(range(0,101), covar_matrix.explained_variance_, curve='convex', direction='decreasing')
print(f"{kn.knee=}")
8
time: 2.47 ms (started: 2024-04-25 18:46:08 -07:00)
In [ ]:
k = 60
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(60, random_state=42), X_fill_zero[:,1:])
time: 2min 1s (started: 2024-04-25 18:57:43 -07:00)
In [ ]:
k = 2
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(2, random_state=42), X_fill_zero[:,1:])
time: 1min 59s (started: 2024-04-25 19:00:32 -07:00)
In [ ]:
k = 3
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(3, random_state=42), X_fill_zero[:,1:])
time: 1min 59s (started: 2024-04-25 19:05:32 -07:00)
In [ ]:
k = 4
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(4, random_state=42), X_fill_zero[:,1:])
time: 1min 56s (started: 2024-04-25 19:07:55 -07:00)
In [ ]:
k = 5
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(5, random_state=42), X_fill_zero[:,1:])
time: 1min 55s (started: 2024-04-25 19:10:25 -07:00)
In [ ]:
k = 6
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(6, random_state=42), X_fill_zero[:,1:])
time: 1min 54s (started: 2024-04-25 19:12:26 -07:00)
In [ ]:
k = 7
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(7, random_state=42), X_fill_zero[:,1:])
time: 1min 55s (started: 2024-04-25 19:14:27 -07:00)
In [ ]:
k = 8
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(8, random_state=42), X_fill_zero[:,1:])
time: 1min 57s (started: 2024-04-25 19:02:37 -07:00)
2.2.2 Fill in Missing Values¶
In [ ]:
X_fill_mean = X.fill_null(strategy='mean')
X_fill_zero = X.fill_null(strategy='zero')
X_fill_min = X.fill_null(strategy='min')
time: 236 ms (started: 2024-04-25 15:33:15 -07:00)
In [ ]:
X_fill_mean
shape: (56_318, 5_980)
| userId | 79132 | 2571 | 318 | 58559 | 2959 | 7153 | 4993 | 356 | 109487 | 5952 | 296 | 260 | 593 | 99114 | 1196 | 60069 | 68954 | 858 | 134130 | 91529 | 527 | 1198 | 68157 | 3578 | 72998 | 106782 | 112852 | 59315 | 4226 | 6539 | 6874 | 4306 | 33794 | 6377 | 74458 | 1704 | … | 6770 | 8928 | 64278 | 66304 | 98956 | 99106 | 116660 | 118760 | 138104 | 141648 | 158956 | 192109 | 193954 | 205857 | 281904 | 522 | 1081 | 2946 | 32792 | 55946 | 85790 | 93819 | 94867 | 97194 | 116668 | 169252 | 203619 | 208807 | 224983 | 81 | 3952 | 7976 | 67620 | 115967 | 127134 | 185997 | 251922 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| cat | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | … | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
| "175325" | 4.0 | 4.0 | 4.5 | 4.5 | 3.5 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 4.5 | 4.0 | 4.0 | 4.0 | 4.0 | 4.0 | 4.0 | 4.5 | 4.5 | 4.0 | 4.5 | 4.0 | 4.0 | 3.5 | 3.5 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 4.0 | 4.5 | … | 3.0 | 3.5 | 3.5 | 2.524752 | 4.0 | 2.5 | 3.5 | 4.0 | 3.4075 | 3.0 | 2.853535 | 3.5 | 3.0 | 3.310945 | 3.255051 | 4.0 | 3.5 | 3.5 | 5.0 | 3.0 | 4.0 | 2.5 | 2.5 | 3.87 | 3.5 | 4.0 | 3.5 | 3.4125 | 3.253769 | 1.5 | 3.0 | 2.0 | 3.492424 | 3.0 | 2.0 | 3.0 | 2.645729 |
| "22744" | 4.0 | 5.0 | 3.0 | 5.0 | 5.0 | 5.0 | 5.0 | 4.0 | 4.0 | 5.0 | 5.0 | 5.0 | 5.0 | 3.0 | 5.0 | 4.0 | 4.0 | 5.0 | 4.0 | 5.0 | 0.5 | 5.0 | 3.0 | 3.0 | 3.0 | 4.0 | 4.0 | 4.0 | 4.0 | 4.0 | 5.0 | 5.0 | 5.0 | 5.0 | 3.0 | 3.0 | … | 3.604061 | 3.3575 | 3.85 | 2.5 | 3.645078 | 4.0 | 3.533854 | 3.638191 | 2.0 | 2.0 | 1.0 | 3.8925 | 0.5 | 3.310945 | 3.255051 | 0.5 | 1.0 | 2.0 | 3.870466 | 0.5 | 3.387755 | 3.28 | 3.0 | 3.87 | 2.0 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 2.0 | 3.407035 | 3.199482 | 3.492424 | 2.0 | 2.0 | 3.0 | 2.645729 |
| "17035" | 1.5 | 1.5 | 5.0 | 4.5 | 4.5 | 4.0 | 3.0 | 3.5 | 2.0 | 3.5 | 5.0 | 0.5 | 5.0 | 4.0 | 0.5 | 1.0 | 4.0 | 5.0 | 4.0 | 0.5 | 4.5 | 4.5 | 4.5 | 4.5 | 1.5 | 4.0 | 0.5 | 4.0 | 4.0 | 3.688088 | 4.5 | 4.0 | 0.5 | 4.0 | 4.0 | 4.0 | … | 2.0 | 1.0 | 3.85 | 0.5 | 3.645078 | 3.5 | 3.533854 | 0.5 | 3.4075 | 0.5 | 2.853535 | 3.8925 | 2.780612 | 2.5 | 3.255051 | 2.0 | 4.5 | 3.435567 | 3.870466 | 0.5 | 2.5 | 3.0 | 3.5 | 3.87 | 3.206468 | 3.992386 | 3.6675 | 3.5 | 3.253769 | 4.0 | 3.5 | 3.199482 | 3.5 | 2.5 | 3.5 | 2.721939 | 2.645729 |
| "15875" | 3.0 | 5.0 | 5.0 | 4.0 | 5.0 | 4.0 | 4.0 | 3.5 | 4.0 | 4.0 | 5.0 | 4.0 | 3.5 | 4.5 | 4.5 | 3.0 | 3.5 | 4.0 | 5.0 | 4.0 | 4.0 | 4.0 | 5.0 | 2.5 | 3.0 | 4.0 | 4.0 | 4.0 | 5.0 | 3.0 | 4.0 | 4.0 | 3.5 | 3.5 | 4.0 | 2.5 | … | 3.0 | 3.0 | 3.85 | 2.524752 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.5 | 4.0 | 2.0 | 3.310945 | 3.255051 | 4.0 | 3.0 | 3.435567 | 3.0 | 2.756219 | 3.387755 | 3.0 | 1.5 | 3.87 | 3.5 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 3.0 | 1.5 | 3.5 | 3.492424 | 3.0 | 3.25 | 2.721939 | 2.645729 |
| "43703" | 2.5 | 5.0 | 3.5 | 4.0 | 3.5 | 3.0 | 2.5 | 2.5 | 3.5 | 2.5 | 3.5 | 3.5 | 5.0 | 3.5 | 5.0 | 4.0 | 3.5 | 4.0 | 2.5 | 3.5 | 3.5 | 3.5 | 4.0 | 2.5 | 2.0 | 4.0 | 2.5 | 3.5 | 3.5 | 3.0 | 3.5 | 3.0 | 3.5 | 2.5 | 3.0 | 3.5 | … | 1.0 | 3.0 | 3.0 | 1.5 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.853535 | 1.5 | 2.780612 | 1.5 | 3.255051 | 2.0 | 3.510417 | 3.435567 | 2.0 | 2.756219 | 2.5 | 2.0 | 2.95202 | 3.87 | 2.5 | 3.992386 | 2.0 | 3.4125 | 2.0 | 3.035 | 3.407035 | 3.199482 | 3.492424 | 2.0 | 3.25 | 2.721939 | 1.5 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| "114825" | 4.132816 | 4.11884 | 4.5 | 4.0 | 4.5 | 4.03331 | 4.028187 | 4.076231 | 4.112761 | 4.01789 | 4.226428 | 3.957079 | 4.0 | 4.008424 | 4.046046 | 3.978485 | 5.0 | 5.0 | 4.006142 | 3.933931 | 4.08114 | 3.934357 | 4.028735 | 3.956386 | 3.512107 | 3.918774 | 3.897736 | 3.749119 | 4.083559 | 3.688088 | 3.802861 | 3.59861 | 3.869268 | 3.739308 | 3.991113 | 4.135534 | … | 3.604061 | 3.3575 | 3.85 | 2.524752 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.853535 | 3.8925 | 2.780612 | 3.310945 | 3.255051 | 3.382653 | 3.510417 | 3.435567 | 3.870466 | 2.756219 | 3.387755 | 3.28 | 2.95202 | 3.87 | 3.206468 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 3.035 | 3.407035 | 3.199482 | 3.492424 | 3.477273 | 3.25 | 2.721939 | 2.645729 |
| "60686" | 4.0 | 4.5 | 4.328511 | 4.0 | 5.0 | 4.03331 | 4.028187 | 4.5 | 4.112761 | 4.01789 | 5.0 | 2.0 | 5.0 | 4.0 | 4.046046 | 3.978485 | 3.901311 | 4.219631 | 4.006142 | 3.5 | 5.0 | 3.934357 | 4.0 | 4.0 | 2.5 | 3.918774 | 3.897736 | 3.5 | 4.083559 | 3.688088 | 4.5 | 3.59861 | 3.869268 | 3.739308 | 4.5 | 4.5 | … | 3.604061 | 3.3575 | 3.85 | 2.524752 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.853535 | 3.8925 | 2.780612 | 3.310945 | 3.255051 | 3.382653 | 3.510417 | 3.435567 | 3.870466 | 2.756219 | 3.387755 | 3.28 | 2.95202 | 3.87 | 3.206468 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 3.035 | 3.407035 | 3.199482 | 3.492424 | 3.477273 | 3.25 | 2.721939 | 2.645729 |
| "71434" | 4.132816 | 4.11884 | 4.328511 | 4.133716 | 4.218658 | 4.03331 | 4.028187 | 4.076231 | 4.112761 | 4.01789 | 5.0 | 4.5 | 5.0 | 4.008424 | 4.046046 | 3.978485 | 3.901311 | 5.0 | 4.006142 | 3.933931 | 5.0 | 3.934357 | 4.028735 | 3.956386 | 3.512107 | 3.918774 | 3.897736 | 3.749119 | 5.0 | 3.688088 | 3.802861 | 3.59861 | 3.869268 | 3.739308 | 3.991113 | 4.135534 | … | 3.604061 | 3.3575 | 3.85 | 2.524752 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.853535 | 3.8925 | 2.780612 | 3.310945 | 3.255051 | 3.382653 | 3.510417 | 3.435567 | 3.870466 | 2.756219 | 3.387755 | 3.28 | 2.95202 | 3.87 | 3.206468 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 3.035 | 3.407035 | 3.199482 | 3.492424 | 3.477273 | 3.25 | 2.721939 | 2.645729 |
| "57670" | 4.5 | 4.5 | 4.328511 | 4.133716 | 4.218658 | 4.03331 | 4.028187 | 4.076231 | 4.5 | 4.01789 | 4.226428 | 4.5 | 4.082426 | 4.008424 | 5.0 | 5.0 | 4.5 | 4.219631 | 4.5 | 3.933931 | 4.08114 | 4.0 | 4.028735 | 3.956386 | 4.0 | 3.918774 | 4.5 | 4.5 | 4.083559 | 3.688088 | 3.802861 | 3.59861 | 3.869268 | 3.739308 | 3.991113 | 4.135534 | … | 3.604061 | 3.3575 | 3.85 | 2.524752 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.853535 | 3.8925 | 2.780612 | 3.310945 | 3.255051 | 3.382653 | 3.510417 | 3.435567 | 3.870466 | 2.756219 | 3.387755 | 3.28 | 2.95202 | 3.87 | 3.206468 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 3.035 | 3.407035 | 3.199482 | 3.492424 | 3.477273 | 3.25 | 2.721939 | 2.645729 |
| "121244" | 4.132816 | 2.5 | 4.5 | 4.133716 | 4.218658 | 4.03331 | 4.028187 | 4.076231 | 4.112761 | 4.01789 | 4.226428 | 3.957079 | 4.082426 | 4.008424 | 4.046046 | 3.978485 | 3.901311 | 4.219631 | 4.006142 | 3.933931 | 4.08114 | 3.934357 | 4.028735 | 3.956386 | 3.512107 | 3.918774 | 3.897736 | 3.749119 | 4.083559 | 3.688088 | 3.802861 | 3.59861 | 3.869268 | 3.739308 | 3.991113 | 4.135534 | … | 3.604061 | 3.3575 | 3.85 | 2.524752 | 3.645078 | 2.78 | 3.533854 | 3.638191 | 3.4075 | 3.194737 | 2.853535 | 3.8925 | 2.780612 | 3.310945 | 3.255051 | 3.382653 | 3.510417 | 3.435567 | 3.870466 | 2.756219 | 3.387755 | 2.5 | 2.95202 | 3.87 | 3.206468 | 3.992386 | 3.6675 | 3.4125 | 3.253769 | 3.035 | 3.407035 | 3.199482 | 3.492424 | 3.477273 | 3.25 | 2.721939 | 2.645729 |
time: 38.1 ms (started: 2024-04-25 13:45:04 -07:00)
In [ ]:
# it is hard to explain why you impute these values. so leave it for now.
time: 174 µs (started: 2024-04-25 16:33:20 -07:00)
In [ ]:
# Mean imputer
from sklearn.impute import SimpleImputer
imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
df_mean_imputed = pd.DataFrame(imp_mean.fit_transform(df_with_missing),columns = df_with_missing.columns)
time: 8.19 s (started: 2024-04-24 18:15:24 -07:00)
In [ ]:
# Most Frequent imputer
from sklearn.impute import SimpleImputer
imp_mfreq = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
df_mfreq_imputed = pd.DataFrame(imp_mfreq.fit_transform(df_with_missing))
time: 8.41 s (started: 2024-04-24 18:15:32 -07:00)
All the advanced imputer methods takes weeks to run.¶
In [ ]:
# KNN imputer
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=5)
df_knn_imputed = pd.DataFrame(imputer.fit_transform(df_with_missing),columns = df_with_missing.columns)
In [ ]:
# quick and dirty. fill in movie mean + random user factor
import miceforest as mf
kds = mf.ImputationKernel(
df_with_missing,
datasets=1,
save_all_iterations=False,
random_state=420,
)
kds.mice(1)
df_mice_forest_imputed = kds.complete_data()
In [ ]:
dir()
In [ ]:
In [ ]:
In [ ]:
2.3 KMeans Clustering¶
In [ ]:
# From SVD, we have U as the User Embeddings.
# From Imputation, we have X_fill_mean as the User Embeddings.
# From LLMs, we have U_embed as the User Embeddings.
# Then, we can run either KMeans or UMAP+HDBSCAN on these UEs to get User Clusters.
In [ ]:
U.shape
Out[ ]:
(56318, 10)
time: 1.19 ms (started: 2024-04-26 11:54:43 -07:00)
In [ ]:
kmeans = KMeans(n_clusters=10, random_state=0)
# Fit the KMeans algorithm to the 'U' matrix
kmeans.fit(U)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
print(f"{centroids.shape=}")
print(f"{labels[:10]=}")
centroids.shape=(10, 10)
labels[:10]=array([1, 1, 1, 1, 1, 1, 1, 1, 9, 1], dtype=int32)
time: 835 ms (started: 2024-04-26 11:58:51 -07:00)
In [ ]:
labels.shape # should be 56318x1. plug it back in X to get avg loading for movies
Out[ ]:
(56318,)
time: 1.26 ms (started: 2024-04-26 12:02:07 -07:00)
In [ ]:
np.array(np.unique(labels, return_counts=True)).T
Out[ ]:
array([[ 0, 13843],
[ 1, 262],
[ 2, 2315],
[ 3, 2158],
[ 4, 21798],
[ 5, 876],
[ 6, 5820],
[ 7, 1286],
[ 8, 7293],
[ 9, 667]])
time: 1.86 ms (started: 2024-04-26 12:04:45 -07:00)
In [ ]:
# plug labels back to get movie-cluster loadings
X_L = X.with_columns(
pl.Series('labels', labels)
).group_by('labels', maintain_order=True).mean().select(
pl.exclude('userId')
)
X_L
Out[ ]:
shape: (10, 5_980)
| labels | 79132 | 2571 | 318 | 58559 | 2959 | 7153 | 4993 | 356 | 109487 | 5952 | 296 | 260 | 593 | 99114 | 1196 | 60069 | 68954 | 858 | 134130 | 91529 | 527 | 1198 | 68157 | 3578 | 72998 | 106782 | 112852 | 59315 | 4226 | 6539 | 6874 | 4306 | 33794 | 6377 | 74458 | 1704 | … | 6770 | 8928 | 64278 | 66304 | 98956 | 99106 | 116660 | 118760 | 138104 | 141648 | 158956 | 192109 | 193954 | 205857 | 281904 | 522 | 1081 | 2946 | 32792 | 55946 | 85790 | 93819 | 94867 | 97194 | 116668 | 169252 | 203619 | 208807 | 224983 | 81 | 3952 | 7976 | 67620 | 115967 | 127134 | 185997 | 251922 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| i32 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | … | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
| 1 | 4.0 | 4.40856 | 4.37247 | 4.2607 | 4.28937 | 4.010246 | 4.058 | 4.012048 | 3.91453 | 3.97561 | 4.373518 | 4.243083 | 4.331395 | 3.909091 | 4.301205 | 3.940265 | 3.798206 | 4.245495 | 3.835586 | 3.784585 | 4.118605 | 4.363636 | 3.902834 | 4.037402 | 3.487854 | 3.832558 | 4.044355 | 3.918 | 4.165975 | 3.753968 | 4.011719 | 3.672131 | 3.982213 | 3.714583 | 3.652542 | 3.913717 | … | 3.275 | 3.145161 | 3.5625 | 2.535714 | 2.75 | 2.709677 | 2.0 | 2.636364 | 3.340909 | 2.625 | 3.064516 | 3.25 | 2.710526 | 2.970588 | 3.416667 | 3.028571 | 3.322581 | 3.22 | 3.53125 | 2.470588 | 3.068182 | 3.272727 | 3.048077 | 4.178571 | 3.155556 | 3.5 | 2.722222 | 3.388889 | 3.027778 | 2.945652 | 3.518868 | 2.913043 | 3.603448 | 3.297872 | 3.227273 | 3.157895 | 2.543478 |
| 9 | 4.374224 | 4.48367 | 4.472696 | 4.412461 | 4.30737 | 4.263072 | 4.225884 | 4.189097 | 4.267537 | 4.22578 | 4.159785 | 4.073785 | 4.050645 | 4.116838 | 4.096257 | 3.979242 | 3.816148 | 4.246795 | 4.194728 | 4.216216 | 4.110738 | 4.038321 | 3.97265 | 4.326264 | 3.958991 | 3.976407 | 4.063796 | 4.078906 | 4.116959 | 3.935877 | 3.85274 | 3.734601 | 4.166933 | 3.759528 | 3.994755 | 4.145833 | … | 3.222222 | 3.428571 | 3.75 | 2.9375 | 3.318182 | 2.659091 | 2.25 | 3.375 | 3.526316 | 3.184211 | 3.166667 | 2.666667 | 2.8125 | 2.545455 | 2.583333 | 3.416667 | 3.0 | 2.75 | 4.5 | 3.267857 | 3.0 | 2.909091 | 2.857143 | 3.678571 | 3.178571 | 3.166667 | 2.666667 | 3.291667 | 3.092593 | 3.263158 | 3.5 | 3.0 | 3.369565 | 3.263158 | 3.571429 | 2.5 | 2.744898 |
| 5 | 3.951469 | 4.143125 | 4.234203 | 4.07098 | 4.216733 | 3.921371 | 4.013123 | 3.857326 | 3.767459 | 3.938596 | 4.280078 | 4.114489 | 4.250319 | 3.878066 | 4.208995 | 3.899852 | 3.786963 | 4.198738 | 3.788462 | 3.605442 | 3.95572 | 4.137281 | 3.882759 | 3.692946 | 3.203366 | 3.728785 | 3.778231 | 3.572751 | 3.953148 | 3.540802 | 3.898385 | 3.472033 | 3.676 | 3.66404 | 3.668971 | 3.880435 | … | 3.583333 | 3.244186 | 3.869565 | 1.703704 | 3.416667 | 2.521739 | 3.214286 | 3.384615 | 3.038462 | 3.25 | 2.386364 | 3.9375 | 2.365385 | 3.057692 | 3.227273 | 3.40625 | 3.220588 | 3.3 | 3.657895 | 2.431034 | 3.216216 | 3.147059 | 2.928571 | 3.989362 | 3.128205 | 3.9 | 3.526316 | 3.26 | 2.653846 | 3.026316 | 3.107143 | 3.038462 | 3.277778 | 3.625 | 2.880952 | 2.442308 | 2.264706 |
| 7 | 3.730354 | 3.839698 | 4.019305 | 3.788321 | 4.000476 | 3.796748 | 3.809051 | 3.68019 | 3.676234 | 3.755543 | 4.339982 | 3.863181 | 4.072336 | 3.782511 | 3.9449 | 3.879375 | 3.757656 | 4.502269 | 3.525937 | 3.375661 | 4.063901 | 3.865764 | 3.884908 | 3.498733 | 2.864106 | 3.671037 | 3.326014 | 3.081487 | 4.019151 | 3.183942 | 3.727477 | 3.28777 | 3.484564 | 3.597143 | 3.648005 | 3.778055 | … | 3.65 | 3.348837 | 4.053846 | 1.833333 | 3.181818 | 2.5 | 3.323529 | 3.8125 | 2.166667 | 2.384615 | 1.666667 | 4.132653 | 2.75 | 3.171053 | 3.173077 | 3.321429 | 3.546875 | 3.45122 | 3.893939 | 3.0 | 3.724638 | 3.166667 | 3.25 | 4.0 | 2.888889 | 4.03125 | 3.833333 | 3.316667 | 3.0 | 2.583333 | 3.764706 | 2.708333 | 3.5 | 3.5 | 3.277778 | 2.928571 | 1.0 |
| 2 | 4.319876 | 4.378527 | 4.416391 | 4.315249 | 4.309292 | 4.187888 | 4.158518 | 4.190753 | 4.208621 | 4.14554 | 4.119231 | 4.017411 | 4.078606 | 4.060111 | 4.058716 | 3.972491 | 3.837855 | 4.113806 | 4.127123 | 4.08979 | 4.140316 | 3.96871 | 3.977286 | 4.181915 | 3.759015 | 3.951284 | 3.994916 | 3.952915 | 4.07766 | 3.861793 | 3.755793 | 3.621359 | 4.045024 | 3.696608 | 4.051259 | 4.140591 | … | 3.472222 | 3.6 | 4.142857 | 2.5 | 3.560606 | 2.770833 | 3.46875 | 3.642857 | 3.242857 | 3.1 | 2.797297 | 3.166667 | 2.875 | 3.227273 | 3.0 | 3.615385 | 2.7 | 3.111111 | 4.25 | 2.824324 | 3.125 | 3.111111 | 2.833333 | 3.833333 | 3.416667 | 3.0 | 3.428571 | 3.176471 | 3.342105 | 3.0 | 3.325 | 3.52 | 3.55 | 3.464286 | 3.25 | 2.543478 | 2.77027 |
| 3 | 3.845888 | 4.064403 | 4.235414 | 3.985607 | 4.117811 | 3.872789 | 3.90515 | 3.852941 | 3.840451 | 3.872852 | 4.17204 | 4.010347 | 4.163093 | 3.859091 | 4.113242 | 3.813718 | 3.767078 | 4.079365 | 3.774575 | 3.628505 | 3.896067 | 4.024083 | 3.840262 | 3.64818 | 3.083072 | 3.719661 | 3.807364 | 3.51514 | 3.952791 | 3.487327 | 3.806662 | 3.52854 | 3.660504 | 3.652157 | 3.708262 | 3.883149 | … | 3.552632 | 3.225 | 3.571429 | 2.217391 | 3.0 | 3.36 | 3.642857 | 3.75 | 3.368421 | 2.958333 | 2.75 | 3.441176 | 2.566667 | 3.392857 | 3.144737 | 3.295455 | 3.409091 | 4.017857 | 3.583333 | 1.722222 | 3.178571 | 3.285714 | 2.897436 | 3.833333 | 3.242857 | 3.846154 | 3.75 | 3.352941 | 3.0 | 2.9 | 2.764706 | 3.285714 | 2.5 | 3.40625 | 3.25 | 2.975 | 1.958333 |
| 6 | 4.295302 | 4.272826 | 4.412418 | 4.237551 | 4.271272 | 4.096102 | 4.082311 | 4.142607 | 4.222386 | 4.061322 | 4.165673 | 3.924207 | 4.071086 | 4.03984 | 4.010932 | 3.972029 | 3.866807 | 4.158256 | 4.118884 | 4.058869 | 4.088632 | 3.909091 | 4.034478 | 4.139023 | 3.744676 | 3.975093 | 3.950496 | 3.897311 | 4.094394 | 3.817641 | 3.715494 | 3.584189 | 3.969184 | 3.673503 | 4.053136 | 4.190691 | … | 3.714286 | 3.666667 | 3.208333 | 2.552632 | 3.629032 | 2.666667 | 3.65625 | 3.666667 | 3.575 | 3.578125 | 2.777778 | 4.214286 | 3.225 | 3.5 | 3.375 | 3.363636 | 3.4 | 2.833333 | 3.25 | 3.08 | 2.6 | 3.818182 | 3.038462 | 3.222222 | 3.0 | 2.928571 | 3.0 | 3.5 | 3.5 | 3.357143 | 3.434783 | 3.15 | 3.869565 | 3.659091 | 3.4 | 3.045455 | 3.0 |
| 4 | 3.958701 | 3.830385 | 4.043483 | 3.94634 | 3.972787 | 3.867537 | 3.850245 | 3.849648 | 4.010293 | 3.867213 | 4.038576 | 3.690628 | 3.815203 | 3.895341 | 3.774429 | 3.916583 | 3.894775 | 3.976823 | 3.978156 | 3.798644 | 3.734758 | 3.601564 | 3.920548 | 3.629735 | 3.465972 | 3.793473 | 3.930318 | 3.779603 | 3.87114 | 3.718121 | 3.678909 | 3.619175 | 3.695165 | 3.757506 | 3.83805 | 4.038248 | … | 3.907895 | 3.586207 | 3.628571 | 3.193878 | 4.057143 | 2.768293 | 3.837662 | 3.848214 | 3.611111 | 3.56383 | 2.892857 | 4.009615 | 2.830645 | 3.727273 | 3.271429 | 3.633333 | 4.039474 | 3.695652 | 4.038462 | 2.571429 | 3.453125 | 3.475 | 2.617647 | 3.55 | 3.464286 | 4.351351 | 3.855556 | 3.433333 | 3.48913 | 2.9 | 3.5 | 3.554054 | 3.633333 | 3.653846 | 3.244186 | 2.666667 | 2.608696 |
| 0 | 4.340546 | 4.20889 | 4.516312 | 4.292983 | 4.420844 | 4.117939 | 4.094925 | 4.308089 | 4.296029 | 4.086756 | 4.354976 | 3.790104 | 4.187628 | 4.162293 | 3.865264 | 4.087312 | 3.989047 | 4.312539 | 4.038957 | 4.12249 | 4.295582 | 3.725758 | 4.220837 | 4.10744 | 3.58329 | 4.094983 | 3.775394 | 3.602766 | 4.229718 | 3.613816 | 3.934152 | 3.613015 | 3.984757 | 3.787655 | 4.215424 | 4.273211 | … | 3.592593 | 4.0 | 4.5 | 1.625 | 3.897436 | 3.666667 | 3.690476 | 4.2 | 4.083333 | 3.208333 | 2.666667 | 4.0 | 2.75 | 3.1 | 3.727273 | 3.722222 | 3.0 | 3.142857 | 4.0 | 3.375 | 3.0 | 3.875 | 3.166667 | 3.636364 | 3.714286 | 4.295455 | 4.0 | 3.96875 | 3.225 | 4.0 | 4.0 | 3.25 | 3.857143 | 3.666667 | 3.0 | 1.25 | 2.5 |
| 8 | 3.95517 | 4.217858 | 4.345401 | 4.075818 | 4.135966 | 4.140683 | 4.180917 | 4.055403 | 3.908564 | 4.142692 | 4.289042 | 4.368097 | 4.132487 | 3.86259 | 4.417532 | 4.016213 | 3.933724 | 4.31702 | 3.993248 | 3.79918 | 4.063517 | 4.283459 | 3.927787 | 3.926336 | 3.319732 | 3.789293 | 3.972625 | 3.771765 | 4.083798 | 3.657728 | 3.828834 | 3.63146 | 3.875 | 3.792329 | 3.741379 | 4.134059 | … | 3.5 | 3.333333 | 3.888889 | 2.4 | 3.8 | 2.5 | 2.8 | 0.5 | 4.333333 | 1.8 | 2.5 | 3.833333 | 3.0 | 3.458333 | 3.857143 | 4.1 | 3.96875 | 3.1875 | 4.5 | 2.7 | 3.25 | 3.5 | 3.833333 | 4.0 | 3.5 | 4.363636 | 3.78125 | 4.5 | 3.5 | 3.7 | 3.8 | 3.833333 | 2.0 | 3.833333 | 3.5 | 3.375 | 3.0 |
time: 83.2 ms (started: 2024-04-26 12:15:18 -07:00)
In [ ]:
X_L.write_csv('../data/X_L_movie_loadings_for_10_groups.csv')
time: 40.4 ms (started: 2024-04-26 12:12:16 -07:00)
In [ ]:
M_C_L = X_L.with_columns(
pl.col('labels').cast(pl.Utf8)
).transpose(include_header=True, header_name='movieId', column_names='labels')
M_C_L.write_csv('../data/M_C_L_movie_cluster_loadings_5979x11.csv')
M_C_L
Out[ ]:
shape: (5_979, 11)
| movieId | 1 | 9 | 5 | 7 | 2 | 3 | 6 | 4 | 0 | 8 |
|---|---|---|---|---|---|---|---|---|---|---|
| str | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
| "79132" | 4.0 | 4.374224 | 3.951469 | 3.730354 | 4.319876 | 3.845888 | 4.295302 | 3.958701 | 4.340546 | 3.95517 |
| "2571" | 4.40856 | 4.48367 | 4.143125 | 3.839698 | 4.378527 | 4.064403 | 4.272826 | 3.830385 | 4.20889 | 4.217858 |
| "318" | 4.37247 | 4.472696 | 4.234203 | 4.019305 | 4.416391 | 4.235414 | 4.412418 | 4.043483 | 4.516312 | 4.345401 |
| "58559" | 4.2607 | 4.412461 | 4.07098 | 3.788321 | 4.315249 | 3.985607 | 4.237551 | 3.94634 | 4.292983 | 4.075818 |
| "2959" | 4.28937 | 4.30737 | 4.216733 | 4.000476 | 4.309292 | 4.117811 | 4.271272 | 3.972787 | 4.420844 | 4.135966 |
| … | … | … | … | … | … | … | … | … | … | … |
| "67620" | 3.603448 | 3.369565 | 3.277778 | 3.5 | 3.55 | 2.5 | 3.869565 | 3.633333 | 3.857143 | 2.0 |
| "115967" | 3.297872 | 3.263158 | 3.625 | 3.5 | 3.464286 | 3.40625 | 3.659091 | 3.653846 | 3.666667 | 3.833333 |
| "127134" | 3.227273 | 3.571429 | 2.880952 | 3.277778 | 3.25 | 3.25 | 3.4 | 3.244186 | 3.0 | 3.5 |
| "185997" | 3.157895 | 2.5 | 2.442308 | 2.928571 | 2.543478 | 2.975 | 3.045455 | 2.666667 | 1.25 | 3.375 |
| "251922" | 2.543478 | 2.744898 | 2.264706 | 1.0 | 2.77027 | 1.958333 | 3.0 | 2.608696 | 2.5 | 3.0 |
time: 42.7 ms (started: 2024-04-26 12:35:52 -07:00)
In [ ]:
########### skip to 3 Prepare FT Data ###################
In [ ]:
# U.npy (56_318, 10) to do KMeans clustering to get labels
from yellowbrick.cluster import silhouette_visualizer
visualizer = silhouette_visualizer(KMeans(2, random_state=42), U)
time: 55.6 s (started: 2024-04-26 00:36:01 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(5,100,5))
vis.fit(U)
vis.show()
Out[ ]:
<Axes: title={'center': 'Distortion Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='distortion score'>
time: 2.64 s (started: 2024-04-26 00:39:37 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(5,100,5), metric='silhouette')
vis.fit(U)
vis.show()
Out[ ]:
<Axes: title={'center': 'Silhouette Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='silhouette score'>
time: 7min 23s (started: 2024-04-26 00:43:13 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(5,100,5), metric='calinski_harabasz')
vis.fit(U)
vis.show()
Out[ ]:
<Axes: title={'center': 'Calinski Harabasz Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='calinski harabasz score'>
time: 1.27 s (started: 2024-04-26 00:50:45 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(20,40), metric='silhouette')
vis.fit(U)
vis.show()
Out[ ]:
<Axes: title={'center': 'Silhouette Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='silhouette score'>
time: 7min 53s (started: 2024-04-26 00:51:02 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(20,40))
vis.fit(U)
vis.show()
Out[ ]:
<Axes: title={'center': 'Distortion Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='distortion score'>
time: 2.62 s (started: 2024-04-26 01:11:04 -07:00)
In [ ]:
# elbow viz
from yellowbrick.cluster import KElbowVisualizer
vis = KElbowVisualizer(KMeans(), k=range(10,40), metric='silhouette')
vis.fit(U)
vis.show()
Out[ ]:
<Axes: title={'center': 'Silhouette Score Elbow for KMeans Clustering'}, xlabel='k', ylabel='silhouette score'>
time: 11min 54s (started: 2024-04-26 00:59:03 -07:00)
2.4 UMAP + HDBSCAN¶
2.4.1 MF Embeddings¶
In [ ]:
# user embeddings from NMF
U0
Out[ ]:
array([[-2.13238908e-02, 1.44819752e-02, 6.02356211e-02, ...,
-2.10993512e-03, 1.06076370e-04, -6.22310116e-04],
[-1.94242450e-02, 2.64664853e-02, 4.19494100e-02, ...,
-1.50263473e-03, 4.69035863e-04, 1.42112823e-03],
[-1.54190095e-02, 3.42269345e-03, 4.71016766e-02, ...,
-4.83255914e-04, 6.66800463e-04, -4.94009473e-03],
...,
[-1.77246506e-03, -1.52386116e-03, -4.81911236e-03, ...,
9.96737901e-01, 1.50495452e-04, -1.64694205e-04],
[-1.21989953e-03, 1.79720073e-03, -2.77891537e-03, ...,
1.50093534e-04, 9.91962407e-01, -7.39255793e-05],
[-8.48773956e-04, -2.09768266e-03, -4.46399927e-04, ...,
-1.63915431e-04, 2.03453663e-04, 9.58858921e-01]])
time: 2.25 ms (started: 2024-04-26 02:18:46 -07:00)
In [ ]:
U0.shape
Out[ ]:
(56318, 56318)
time: 1.12 ms (started: 2024-04-26 03:03:12 -07:00)
In [ ]:
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Dimension reduction and clustering libraries
import umap
import hdbscan
import sklearn.cluster as cluster
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
2024-04-26 03:03:55.300914: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. 2024-04-26 03:03:57.417630: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2024-04-26 03:04:25.388514: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
time: 1min 29s (started: 2024-04-26 03:03:24 -07:00)
In [ ]:
from sklearn.decomposition import NMF
A = X_fill_zero[:,1:]
# Initialize NMF and fit it to the matrix A
nmf = NMF(n_components=5980, init='random', random_state=0)
U = nmf.fit_transform(A)
M = nmf.components_
# Print the resulting matrices
print(f"{U[:2]=}")
print(f"{M[:2]=}")
U[:2]=array([[2.9465047 , 7.57024189, 0. , 0.60239043, 0.55344382, 0. , 0.02233019, 0.9683429 , 0. , 4.84344654], [4.39981346, 9.07085948, 0. , 0.53721187, 0.68230197, 0. , 0. , 0.61367887, 0.13100777, 1.24841661]])
M[:2]=array([[0. , 0.09379923, 0. , ..., 0.00821147, 0. , 0.02712022], [0.13191784, 0.10947779, 0.153014 , ..., 0.01517629, 0.01982737, 0.00635432]])
time: 20.2 s (started: 2024-04-26 00:14:37 -07:00)
In [ ]:
3 Prepare Finetuning Data¶
In [ ]:
# check # of movies have metadata
movies = filtered_movie_ids.join(movie_counts, on='movieId', how='left')
movies
Out[ ]:
shape: (5_981, 6)
| movieId | count | imdbId | tmdbId | title | genres |
|---|---|---|---|---|---|
| i64 | u32 | i64 | i64 | str | str |
| 79132 | 47695 | 1375666 | 27205 | "Inception (2010)" | "Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX" |
| 2571 | 47209 | 133093 | 603 | "Matrix, The (1999)" | "Action|Sci-Fi|Thriller" |
| 318 | 44585 | 111161 | 278 | "Shawshank Redemption, The (1994)" | "Crime|Drama" |
| 58559 | 42725 | 468569 | 155 | "Dark Knight, The (2008)" | "Action|Crime|Drama|IMAX" |
| 2959 | 41295 | 137523 | 550 | "Fight Club (1999)" | "Action|Crime|Drama|Thriller" |
| … | … | … | … | … | … |
| 115967 | 200 | 2268458 | 199575 | "These Final Hours (2014)" | "Drama|Thriller" |
| 251922 | 200 | 6654210 | 581726 | "Infinite (2021)" | "Action|Sci-Fi|Thriller" |
| 7976 | 200 | 209077 | 7090 | "Ken Park (2002)" | "Drama" |
| 67620 | 200 | 1073241 | 14637 | "Nothing But the Truth (2008)" | "Drama|Thriller" |
| 185997 | 200 | 5619332 | 399796 | "Life of the Party (2018)" | "Comedy" |
time: 4.75 ms (started: 2024-04-26 12:24:01 -07:00)
In [ ]:
plots = pl.read_csv('../../../movieplot/mpst_full_data.csv')
plots = plots.with_columns(
imdbId=pl.col('imdb_id').str.replace(r'tt0*','').cast(pl.Int64, strict=False)
)
plots
Out[ ]:
shape: (14_828, 7)
| imdb_id | title | plot_synopsis | tags | split | synopsis_source | imdbId |
|---|---|---|---|---|---|---|
| str | str | str | str | str | str | i64 |
| "tt0057603" | "I tre volti della paura" | "Note: this synopsis is for the orginal Italian release with the segments in this certain order.Boris Karloff introduces three horror tales of the macabre and the supernatural known as the 'Three Face… | "cult, horror, gothic, murder, atmospheric" | "train" | "imdb" | 57603 |
| "tt1733125" | "Dungeons & Dragons: The Book of Vile Darkness" | "Two thousand years ago, Nhagruul the Foul, a sorcerer who reveled in corrupting the innocent and the spread of despair, neared the end of his mortal days and was dismayed. Consumed by hatred for the … | "violence" | "train" | "imdb" | 1733125 |
| "tt0033045" | "The Shop Around the Corner" | "Matuschek's, a gift store in Budapest, is the workplace of Alfred Kralik (James Stewart) and the newly hi Ed Klara Novak (Margaret Sullavan). At work they constantly irritate each other, but this dai… | "romantic" | "test" | "imdb" | 33045 |
| "tt0113862" | "Mr. Holland's Opus" | "Glenn Holland, not a morning person by anyone's standards, is woken up by his wife Iris early one bright September morning in 1964. Glenn has taken a job as a music teacher at the newly renamed John … | "inspiring, romantic, stupid, feel-good" | "train" | "imdb" | 113862 |
| "tt0086250" | "Scarface" | "In May 1980, a Cuban man named Tony Montana (Al Pacino) claims asylum, in Florida, USA, and is in search of the "American Dream" after departing Cuba in the Mariel boatlift of 1980. When questioned b… | "cruelty, murder, dramatic, cult, violence, atmospheric, action, romantic, revenge, sadist" | "val" | "imdb" | 86250 |
| … | … | … | … | … | … | … |
| "tt0219952" | "Lucky Numbers" | "In 1988 Russ Richards (John Travolta), the weatherman for a Harrisburg, Pennsylvania television station, is revered as a local celebrity by his viewers, and fame affords him such perks as a reserved … | "comedy, murder" | "test" | "wikipedia" | 219952 |
| "tt1371159" | "Iron Man 2" | "In Russia, the media covers Tony Stark's disclosure of his identity as Iron Man. Ivan Vanko, whose father Anton Vanko has just died, sees this and begins building a miniature arc reactor similar to S… | "good versus evil, violence" | "train" | "wikipedia" | 1371159 |
| "tt0063443" | "Play Dirty" | "During the North African Campaign in World War II, Captain Douglas (Caine) is a British Petroleum employee seconded to the Royal Engineers to oversee incoming fuel supplies for the British 8th Army. … | "anti war" | "train" | "wikipedia" | 63443 |
| "tt0039464" | "High Wall" | "Steven Kenet catches his unfaithful wife in the apartment of Willard I. Whitcombe, her boss, and apparently strangles her. Believing he killed her, he attempts to commit suicide by driving his car in… | "murder" | "test" | "wikipedia" | 39464 |
| "tt0235166" | "Against All Hope" | "Sometime in the 1950s in Chicago a man, Cecil Moe (Michael Madsen) returns home from work with his friend, Joe Cleveland. When Cecil arrives home, he finds his wife Jean and her friend Shannon pourin… | "christian film" | "test" | "wikipedia" | 235166 |
time: 781 ms (started: 2024-04-26 12:24:10 -07:00)
In [ ]:
# 1756 null plots, scrape later on IMDb
movies = movies.join(plots, on='imdbId', how='left')
movies['plot_synopsis'].is_null().sum()
Out[ ]:
1756
time: 3.48 ms (started: 2024-04-26 12:24:33 -07:00)
In [ ]:
FT_D = movies.drop_nulls().with_columns(
pl.col('movieId').cast(pl.Utf8)
).join(
M_C_L,
on='movieId'
)
FT_D.select(
['movieId', 'title', 'genres', 'imdb_id', 'plot_synopsis', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
)
Out[ ]:
shape: (4_225, 15)
| movieId | title | genres | imdb_id | plot_synopsis | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| str | str | str | str | str | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 | f64 |
| "79132" | "Inception (2010)" | "Action|Crime|Drama|Mystery|Sci-Fi|Thriller|IMAX" | "tt1375666" | "A young man, exhausted and delirious, washes up on a beach, looking up momentarily to see two young children (Claire Geare and Magnus Nolan) playing in the sand before he passes out. An armed guard (… | 4.340546 | 4.0 | 4.319876 | 3.845888 | 3.958701 | 3.951469 | 4.295302 | 3.730354 | 3.95517 | 4.374224 |
| "2571" | "Matrix, The (1999)" | "Action|Sci-Fi|Thriller" | "tt0133093" | "The screen is filled with green, cascading code which gives way to the title, The Matrix.A phone rings and text appears on the screen: "Call trans opt: received. 2-19-98 13:24:18 REC: Log>" As a conv… | 4.20889 | 4.40856 | 4.378527 | 4.064403 | 3.830385 | 4.143125 | 4.272826 | 3.839698 | 4.217858 | 4.48367 |
| "318" | "Shawshank Redemption, The (1994)" | "Crime|Drama" | "tt0111161" | "In 1947, Andy Dufresne (Tim Robbins), a banker in Maine, is convicted of murdering his wife and her lover, a golf pro. Since the state of Maine has no death penalty, he is given two consecutive life … | 4.516312 | 4.37247 | 4.416391 | 4.235414 | 4.043483 | 4.234203 | 4.412418 | 4.019305 | 4.345401 | 4.472696 |
| "58559" | "Dark Knight, The (2008)" | "Action|Crime|Drama|IMAX" | "tt0468569" | "The movie begins with a gang of men with clown masks breaking into the bank where the mob has a large portion of their money stashed. It begins with five clowns, each getting a cut of the spoils. The… | 4.292983 | 4.2607 | 4.315249 | 3.985607 | 3.94634 | 4.07098 | 4.237551 | 3.788321 | 4.075818 | 4.412461 |
| "2959" | "Fight Club (1999)" | "Action|Crime|Drama|Thriller" | "tt0137523" | "We back out of the webbing of neurons and brain cells as the title credits appear, finding ourselves emerging from the sweat-glistened skin of the protagonist: our narrator (Edward Norton), as he loo… | 4.420844 | 4.28937 | 4.309292 | 4.117811 | 3.972787 | 4.216733 | 4.271272 | 4.000476 | 4.135966 | 4.30737 |
| … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
| "93819" | "Absentia (2011)" | "Horror" | "tt1610996" | "As the film begins we see Tricia removing old posters and replacing them with new ones. When she comes home, she finds that her younger sister Callie has arrived. She comes to live with her as the pr… | 3.875 | 3.272727 | 3.111111 | 3.285714 | 3.475 | 3.147059 | 3.818182 | 3.166667 | 3.5 | 2.909091 |
| "81" | "Things to Do in Denver When You're Dead (1995)" | "Crime|Drama|Romance" | "tt0114660" | "Jimmy "The Saint" Tosnia is an ex-gangster living in Denver. Jimmy left the criminal world, to "go straight" with his "Afterlife Advice" business, where dying people videotape messages for their love… | 4.0 | 2.945652 | 3.0 | 2.9 | 2.9 | 3.026316 | 3.357143 | 2.583333 | 3.7 | 3.263158 |
| "3952" | "Contender, The (2000)" | "Drama|Thriller" | "tt0208874" | "Second-term Democratic U.S. President Jackson Evans must select a new Vice President following the sudden death of his previous vice president. The obvious choice seems to be Virginia Governor Jack H… | 4.0 | 3.518868 | 3.325 | 2.764706 | 3.5 | 3.107143 | 3.434783 | 3.764706 | 3.8 | 3.5 |
| "7976" | "Ken Park (2002)" | "Drama" | "tt0209077" | "The opening of the film depicts teenager Ken Park (nicknamed "Krap Nek", which is his first and last name spelled and pronounced backward) skateboarding across Visalia, California. He arrives at a sk… | 3.25 | 2.913043 | 3.52 | 3.285714 | 3.554054 | 3.038462 | 3.15 | 2.708333 | 3.833333 | 3.0 |
| "67620" | "Nothing But the Truth (2008)" | "Drama|Thriller" | "tt1073241" | "Robert Bennett (Richard Dix) is a stockbroker who is very carefree with other people's money. Encouraging clients to buy stocks in companies that are failing is all in a day's work to him. His fiancé… | 3.857143 | 3.603448 | 3.55 | 2.5 | 3.633333 | 3.277778 | 3.869565 | 3.5 | 2.0 | 3.369565 |
time: 6.34 ms (started: 2024-04-26 12:44:50 -07:00)
In [ ]:
FT_D.write_ndjson('../data/FT_D_raw_4225x15.jsonl')
time: 65.7 ms (started: 2024-04-26 12:59:13 -07:00)
In [ ]:
# now how to construct the jsonl file format to best represent what we want to do?
# DSPy package:
# https://github.com/stanfordnlp/dspy/blob/main/dspy/teleprompt/finetune.py
In [ ]:
In [ ]:
4 Finetune a BERT-based model¶
ftd_X = list(FT_D['plot_synopsis']) ftd_y = list(FT_D['0'])
In [ ]:
In [ ]:
In [ ]:
5 Finetune a GPT-based model¶
In [ ]:
In [ ]:
In [ ]:
In [ ]:
6 Finetune a Mixtral of Experts model¶
In [ ]:
# domain expert on movie industry, human interfaces
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: